1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 a_noinsert:; \ 422 ailen[row] = nrow1; \ 423 } while (0) 424 425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 426 do { \ 427 if (col <= lastcol2) low2 = 0; \ 428 else high2 = nrow2; \ 429 lastcol2 = col; \ 430 while (high2 - low2 > 5) { \ 431 t = (low2 + high2) / 2; \ 432 if (rp2[t] > col) high2 = t; \ 433 else low2 = t; \ 434 } \ 435 for (_i = low2; _i < high2; _i++) { \ 436 if (rp2[_i] > col) break; \ 437 if (rp2[_i] == col) { \ 438 if (addv == ADD_VALUES) { \ 439 ap2[_i] += value; \ 440 (void)PetscLogFlops(1.0); \ 441 } else ap2[_i] = value; \ 442 goto b_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) { \ 446 low2 = 0; \ 447 high2 = nrow2; \ 448 goto b_noinsert; \ 449 } \ 450 if (nonew == 1) { \ 451 low2 = 0; \ 452 high2 = nrow2; \ 453 goto b_noinsert; \ 454 } \ 455 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 456 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 457 N = nrow2++ - 1; \ 458 b->nz++; \ 459 high2++; \ 460 /* shift up all the later entries in this row */ \ 461 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 462 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 463 rp2[_i] = col; \ 464 ap2[_i] = value; \ 465 b_noinsert:; \ 466 bilen[row] = nrow2; \ 467 } while (0) 468 469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 470 { 471 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 472 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 473 PetscInt l, *garray = mat->garray, diag; 474 PetscScalar *aa, *ba; 475 476 PetscFunctionBegin; 477 /* code only works for square matrices A */ 478 479 /* find size of row to the left of the diagonal part */ 480 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 481 row = row - diag; 482 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 483 if (garray[b->j[b->i[row] + l]] > diag) break; 484 } 485 if (l) { 486 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 487 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 488 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 489 } 490 491 /* diagonal part */ 492 if (a->i[row + 1] - a->i[row]) { 493 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 494 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 495 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 496 } 497 498 /* right of diagonal part */ 499 if (b->i[row + 1] - b->i[row] - l) { 500 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 501 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 502 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 503 } 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 510 PetscScalar value = 0.0; 511 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 512 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 513 PetscBool roworiented = aij->roworiented; 514 515 /* Some Variables required in the macro */ 516 Mat A = aij->A; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 518 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 519 PetscBool ignorezeroentries = a->ignorezeroentries; 520 Mat B = aij->B; 521 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 522 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 523 MatScalar *aa, *ba; 524 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 525 PetscInt nonew; 526 MatScalar *ap1, *ap2; 527 528 PetscFunctionBegin; 529 PetscCall(MatSeqAIJGetArray(A, &aa)); 530 PetscCall(MatSeqAIJGetArray(B, &ba)); 531 for (i = 0; i < m; i++) { 532 if (im[i] < 0) continue; 533 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 534 if (im[i] >= rstart && im[i] < rend) { 535 row = im[i] - rstart; 536 lastcol1 = -1; 537 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 538 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 539 rmax1 = aimax[row]; 540 nrow1 = ailen[row]; 541 low1 = 0; 542 high1 = nrow1; 543 lastcol2 = -1; 544 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 545 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 546 rmax2 = bimax[row]; 547 nrow2 = bilen[row]; 548 low2 = 0; 549 high2 = nrow2; 550 551 for (j = 0; j < n; j++) { 552 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 553 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 554 if (in[j] >= cstart && in[j] < cend) { 555 col = in[j] - cstart; 556 nonew = a->nonew; 557 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 558 } else if (in[j] < 0) { 559 continue; 560 } else { 561 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 562 if (mat->was_assembled) { 563 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 564 #if defined(PETSC_USE_CTABLE) 565 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 566 col--; 567 #else 568 col = aij->colmap[in[j]] - 1; 569 #endif 570 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 571 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 572 col = in[j]; 573 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 574 B = aij->B; 575 b = (Mat_SeqAIJ *)B->data; 576 bimax = b->imax; 577 bi = b->i; 578 bilen = b->ilen; 579 bj = b->j; 580 ba = b->a; 581 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 582 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 583 rmax2 = bimax[row]; 584 nrow2 = bilen[row]; 585 low2 = 0; 586 high2 = nrow2; 587 bm = aij->B->rmap->n; 588 ba = b->a; 589 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 590 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 591 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 592 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 593 } 594 } else col = in[j]; 595 nonew = b->nonew; 596 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 597 } 598 } 599 } else { 600 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 601 if (!aij->donotstash) { 602 mat->assembled = PETSC_FALSE; 603 if (roworiented) { 604 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 605 } else { 606 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } 608 } 609 } 610 } 611 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 612 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 /* 617 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 618 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 619 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 620 */ 621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 622 { 623 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 624 Mat A = aij->A; /* diagonal part of the matrix */ 625 Mat B = aij->B; /* off-diagonal part of the matrix */ 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 627 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 628 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 629 PetscInt *ailen = a->ilen, *aj = a->j; 630 PetscInt *bilen = b->ilen, *bj = b->j; 631 PetscInt am = aij->A->rmap->n, j; 632 PetscInt diag_so_far = 0, dnz; 633 PetscInt offd_so_far = 0, onz; 634 635 PetscFunctionBegin; 636 /* Iterate over all rows of the matrix */ 637 for (j = 0; j < am; j++) { 638 dnz = onz = 0; 639 /* Iterate over all non-zero columns of the current row */ 640 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 641 /* If column is in the diagonal */ 642 if (mat_j[col] >= cstart && mat_j[col] < cend) { 643 aj[diag_so_far++] = mat_j[col] - cstart; 644 dnz++; 645 } else { /* off-diagonal entries */ 646 bj[offd_so_far++] = mat_j[col]; 647 onz++; 648 } 649 } 650 ailen[j] = dnz; 651 bilen[j] = onz; 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 /* 657 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 660 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 661 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 662 */ 663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 664 { 665 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 666 Mat A = aij->A; /* diagonal part of the matrix */ 667 Mat B = aij->B; /* off-diagonal part of the matrix */ 668 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 670 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 671 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 672 PetscInt *ailen = a->ilen, *aj = a->j; 673 PetscInt *bilen = b->ilen, *bj = b->j; 674 PetscInt am = aij->A->rmap->n, j; 675 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 676 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 677 PetscScalar *aa = a->a, *ba = b->a; 678 679 PetscFunctionBegin; 680 /* Iterate over all rows of the matrix */ 681 for (j = 0; j < am; j++) { 682 dnz_row = onz_row = 0; 683 rowstart_offd = full_offd_i[j]; 684 rowstart_diag = full_diag_i[j]; 685 /* Iterate over all non-zero columns of the current row */ 686 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 687 /* If column is in the diagonal */ 688 if (mat_j[col] >= cstart && mat_j[col] < cend) { 689 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 690 aa[rowstart_diag + dnz_row] = mat_a[col]; 691 dnz_row++; 692 } else { /* off-diagonal entries */ 693 bj[rowstart_offd + onz_row] = mat_j[col]; 694 ba[rowstart_offd + onz_row] = mat_a[col]; 695 onz_row++; 696 } 697 } 698 ailen[j] = dnz_row; 699 bilen[j] = onz_row; 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 705 { 706 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 707 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 708 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 709 710 PetscFunctionBegin; 711 for (i = 0; i < m; i++) { 712 if (idxm[i] < 0) continue; /* negative row */ 713 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 714 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 715 row = idxm[i] - rstart; 716 for (j = 0; j < n; j++) { 717 if (idxn[j] < 0) continue; /* negative column */ 718 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 719 if (idxn[j] >= cstart && idxn[j] < cend) { 720 col = idxn[j] - cstart; 721 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 722 } else { 723 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 724 #if defined(PETSC_USE_CTABLE) 725 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 726 col--; 727 #else 728 col = aij->colmap[idxn[j]] - 1; 729 #endif 730 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 731 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 732 } 733 } 734 } 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 741 PetscInt nstash, reallocs; 742 743 PetscFunctionBegin; 744 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 745 746 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 748 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 755 PetscMPIInt n; 756 PetscInt i, j, rstart, ncols, flg; 757 PetscInt *row, *col; 758 PetscBool other_disassembled; 759 PetscScalar *val; 760 761 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 762 763 PetscFunctionBegin; 764 if (!aij->donotstash && !mat->nooffprocentries) { 765 while (1) { 766 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 767 if (!flg) break; 768 769 for (i = 0; i < n;) { 770 /* Now identify the consecutive vals belonging to the same row */ 771 for (j = i, rstart = row[j]; j < n; j++) { 772 if (row[j] != rstart) break; 773 } 774 if (j < n) ncols = j - i; 775 else ncols = n - i; 776 /* Now assemble all these values with a single function call */ 777 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 778 i = j; 779 } 780 } 781 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 782 } 783 #if defined(PETSC_HAVE_DEVICE) 784 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 785 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 786 if (mat->boundtocpu) { 787 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 788 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 789 } 790 #endif 791 PetscCall(MatAssemblyBegin(aij->A, mode)); 792 PetscCall(MatAssemblyEnd(aij->A, mode)); 793 794 /* determine if any processor has disassembled, if so we must 795 also disassemble ourself, in order that we may reassemble. */ 796 /* 797 if nonzero structure of submatrix B cannot change then we know that 798 no processor disassembled thus we can skip this stuff 799 */ 800 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 801 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 802 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 803 PetscCall(MatDisAssemble_MPIAIJ(mat)); 804 } 805 } 806 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 807 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 808 #if defined(PETSC_HAVE_DEVICE) 809 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 810 #endif 811 PetscCall(MatAssemblyBegin(aij->B, mode)); 812 PetscCall(MatAssemblyEnd(aij->B, mode)); 813 814 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 815 816 aij->rowvalues = NULL; 817 818 PetscCall(VecDestroy(&aij->diag)); 819 820 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 821 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 822 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 823 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 824 } 825 #if defined(PETSC_HAVE_DEVICE) 826 mat->offloadmask = PETSC_OFFLOAD_BOTH; 827 #endif 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 832 { 833 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 834 835 PetscFunctionBegin; 836 PetscCall(MatZeroEntries(l->A)); 837 PetscCall(MatZeroEntries(l->B)); 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 842 { 843 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 844 PetscInt *lrows; 845 PetscInt r, len; 846 PetscBool cong; 847 848 PetscFunctionBegin; 849 /* get locally owned rows */ 850 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 851 PetscCall(MatHasCongruentLayouts(A, &cong)); 852 /* fix right-hand side if needed */ 853 if (x && b) { 854 const PetscScalar *xx; 855 PetscScalar *bb; 856 857 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 858 PetscCall(VecGetArrayRead(x, &xx)); 859 PetscCall(VecGetArray(b, &bb)); 860 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 861 PetscCall(VecRestoreArrayRead(x, &xx)); 862 PetscCall(VecRestoreArray(b, &bb)); 863 } 864 865 if (diag != 0.0 && cong) { 866 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 867 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 868 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 869 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 870 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 871 PetscInt nnwA, nnwB; 872 PetscBool nnzA, nnzB; 873 874 nnwA = aijA->nonew; 875 nnwB = aijB->nonew; 876 nnzA = aijA->keepnonzeropattern; 877 nnzB = aijB->keepnonzeropattern; 878 if (!nnzA) { 879 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 880 aijA->nonew = 0; 881 } 882 if (!nnzB) { 883 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 884 aijB->nonew = 0; 885 } 886 /* Must zero here before the next loop */ 887 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 for (r = 0; r < len; ++r) { 890 const PetscInt row = lrows[r] + A->rmap->rstart; 891 if (row >= A->cmap->N) continue; 892 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 893 } 894 aijA->nonew = nnwA; 895 aijB->nonew = nnwB; 896 } else { 897 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 898 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 899 } 900 PetscCall(PetscFree(lrows)); 901 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 902 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 903 904 /* only change matrix nonzero state if pattern was allowed to be changed */ 905 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 906 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 907 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 908 } 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 913 { 914 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i, j, r, m, len = 0; 917 PetscInt *lrows, *owners = A->rmap->range; 918 PetscMPIInt p = 0; 919 PetscSFNode *rrows; 920 PetscSF sf; 921 const PetscScalar *xx; 922 PetscScalar *bb, *mask, *aij_a; 923 Vec xmask, lmask; 924 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 925 const PetscInt *aj, *ii, *ridx; 926 PetscScalar *aa; 927 928 PetscFunctionBegin; 929 /* Create SF where leaves are input rows and roots are owned rows */ 930 PetscCall(PetscMalloc1(n, &lrows)); 931 for (r = 0; r < n; ++r) lrows[r] = -1; 932 PetscCall(PetscMalloc1(N, &rrows)); 933 for (r = 0; r < N; ++r) { 934 const PetscInt idx = rows[r]; 935 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 936 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 937 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 938 } 939 rrows[r].rank = p; 940 rrows[r].index = rows[r] - owners[p]; 941 } 942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 943 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 944 /* Collect flags for rows to be zeroed */ 945 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 946 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 947 PetscCall(PetscSFDestroy(&sf)); 948 /* Compress and put in row numbers */ 949 for (r = 0; r < n; ++r) 950 if (lrows[r] >= 0) lrows[len++] = r; 951 /* zero diagonal part of matrix */ 952 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 953 /* handle off-diagonal part of matrix */ 954 PetscCall(MatCreateVecs(A, &xmask, NULL)); 955 PetscCall(VecDuplicate(l->lvec, &lmask)); 956 PetscCall(VecGetArray(xmask, &bb)); 957 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 958 PetscCall(VecRestoreArray(xmask, &bb)); 959 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 960 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 961 PetscCall(VecDestroy(&xmask)); 962 if (x && b) { /* this code is buggy when the row and column layout don't match */ 963 PetscBool cong; 964 965 PetscCall(MatHasCongruentLayouts(A, &cong)); 966 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 967 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscCall(VecGetArrayRead(l->lvec, &xx)); 970 PetscCall(VecGetArray(b, &bb)); 971 } 972 PetscCall(VecGetArray(lmask, &mask)); 973 /* remove zeroed rows of off-diagonal matrix */ 974 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 975 ii = aij->i; 976 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 977 /* loop over all elements of off process part of matrix zeroing removed columns*/ 978 if (aij->compressedrow.use) { 979 m = aij->compressedrow.nrows; 980 ii = aij->compressedrow.i; 981 ridx = aij->compressedrow.rindex; 982 for (i = 0; i < m; i++) { 983 n = ii[i + 1] - ii[i]; 984 aj = aij->j + ii[i]; 985 aa = aij_a + ii[i]; 986 987 for (j = 0; j < n; j++) { 988 if (PetscAbsScalar(mask[*aj])) { 989 if (b) bb[*ridx] -= *aa * xx[*aj]; 990 *aa = 0.0; 991 } 992 aa++; 993 aj++; 994 } 995 ridx++; 996 } 997 } else { /* do not use compressed row format */ 998 m = l->B->rmap->n; 999 for (i = 0; i < m; i++) { 1000 n = ii[i + 1] - ii[i]; 1001 aj = aij->j + ii[i]; 1002 aa = aij_a + ii[i]; 1003 for (j = 0; j < n; j++) { 1004 if (PetscAbsScalar(mask[*aj])) { 1005 if (b) bb[i] -= *aa * xx[*aj]; 1006 *aa = 0.0; 1007 } 1008 aa++; 1009 aj++; 1010 } 1011 } 1012 } 1013 if (x && b) { 1014 PetscCall(VecRestoreArray(b, &bb)); 1015 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1016 } 1017 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1018 PetscCall(VecRestoreArray(lmask, &mask)); 1019 PetscCall(VecDestroy(&lmask)); 1020 PetscCall(PetscFree(lrows)); 1021 1022 /* only change matrix nonzero state if pattern was allowed to be changed */ 1023 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1024 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1025 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1026 } 1027 PetscFunctionReturn(PETSC_SUCCESS); 1028 } 1029 1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1033 PetscInt nt; 1034 VecScatter Mvctx = a->Mvctx; 1035 1036 PetscFunctionBegin; 1037 PetscCall(VecGetLocalSize(xx, &nt)); 1038 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1039 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1040 PetscUseTypeMethod(a->A, mult, xx, yy); 1041 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1047 { 1048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1080 PetscFunctionReturn(PETSC_SUCCESS); 1081 } 1082 1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1084 { 1085 MPI_Comm comm; 1086 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1087 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1088 IS Me, Notme; 1089 PetscInt M, N, first, last, *notme, i; 1090 PetscBool lf; 1091 PetscMPIInt size; 1092 1093 PetscFunctionBegin; 1094 /* Easy test: symmetric diagonal block */ 1095 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1096 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1097 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1098 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1099 PetscCallMPI(MPI_Comm_size(comm, &size)); 1100 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1101 1102 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1103 PetscCall(MatGetSize(Amat, &M, &N)); 1104 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1105 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1106 for (i = 0; i < first; i++) notme[i] = i; 1107 for (i = last; i < M; i++) notme[i - last + first] = i; 1108 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1109 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1110 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1111 Aoff = Aoffs[0]; 1112 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1113 Boff = Boffs[0]; 1114 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1115 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1116 PetscCall(MatDestroyMatrices(1, &Boffs)); 1117 PetscCall(ISDestroy(&Me)); 1118 PetscCall(ISDestroy(&Notme)); 1119 PetscCall(PetscFree(notme)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1126 1127 PetscFunctionBegin; 1128 /* do nondiagonal part */ 1129 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1130 /* do local part */ 1131 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1132 /* add partial results together */ 1133 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1134 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1135 PetscFunctionReturn(PETSC_SUCCESS); 1136 } 1137 1138 /* 1139 This only works correctly for square matrices where the subblock A->A is the 1140 diagonal block 1141 */ 1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1148 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1149 PetscCall(MatGetDiagonal(a->A, v)); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCall(MatScale(a->A, aa)); 1159 PetscCall(MatScale(a->B, aa)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1168 const PetscInt *garray = aij->garray; 1169 const PetscScalar *aa, *ba; 1170 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1171 PetscInt64 nz, hnz; 1172 PetscInt *rowlens; 1173 PetscInt *colidxs; 1174 PetscScalar *matvals; 1175 PetscMPIInt rank; 1176 1177 PetscFunctionBegin; 1178 PetscCall(PetscViewerSetUp(viewer)); 1179 1180 M = mat->rmap->N; 1181 N = mat->cmap->N; 1182 m = mat->rmap->n; 1183 rs = mat->rmap->rstart; 1184 cs = mat->cmap->rstart; 1185 nz = A->nz + B->nz; 1186 1187 /* write matrix header */ 1188 header[0] = MAT_FILE_CLASSID; 1189 header[1] = M; 1190 header[2] = N; 1191 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1193 if (rank == 0) { 1194 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1195 else header[3] = (PetscInt)hnz; 1196 } 1197 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1198 1199 /* fill in and store row lengths */ 1200 PetscCall(PetscMalloc1(m, &rowlens)); 1201 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1203 PetscCall(PetscFree(rowlens)); 1204 1205 /* fill in and store column indices */ 1206 PetscCall(PetscMalloc1(nz, &colidxs)); 1207 for (cnt = 0, i = 0; i < m; i++) { 1208 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1209 if (garray[B->j[jb]] > cs) break; 1210 colidxs[cnt++] = garray[B->j[jb]]; 1211 } 1212 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1213 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1214 } 1215 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1217 PetscCall(PetscFree(colidxs)); 1218 1219 /* fill in and store nonzero values */ 1220 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1221 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1222 PetscCall(PetscMalloc1(nz, &matvals)); 1223 for (cnt = 0, i = 0; i < m; i++) { 1224 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1225 if (garray[B->j[jb]] > cs) break; 1226 matvals[cnt++] = ba[jb]; 1227 } 1228 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1229 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1230 } 1231 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1232 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1235 PetscCall(PetscFree(matvals)); 1236 1237 /* write block size option to the viewer's .info file */ 1238 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 #include <petscdraw.h> 1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1244 { 1245 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1246 PetscMPIInt rank = aij->rank, size = aij->size; 1247 PetscBool isdraw, iascii, isbinary; 1248 PetscViewer sviewer; 1249 PetscViewerFormat format; 1250 1251 PetscFunctionBegin; 1252 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1253 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1255 if (iascii) { 1256 PetscCall(PetscViewerGetFormat(viewer, &format)); 1257 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1258 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1259 PetscCall(PetscMalloc1(size, &nz)); 1260 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1261 for (i = 0; i < (PetscInt)size; i++) { 1262 nmax = PetscMax(nmax, nz[i]); 1263 nmin = PetscMin(nmin, nz[i]); 1264 navg += nz[i]; 1265 } 1266 PetscCall(PetscFree(nz)); 1267 navg = navg / size; 1268 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 PetscCall(PetscViewerGetFormat(viewer, &format)); 1272 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1273 MatInfo info; 1274 PetscInt *inodes = NULL; 1275 1276 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1277 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1278 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1279 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1280 if (!inodes) { 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1282 (double)info.memory)); 1283 } else { 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1285 (double)info.memory)); 1286 } 1287 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1289 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(PetscViewerFlush(viewer)); 1292 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1293 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1294 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1297 PetscInt inodecount, inodelimit, *inodes; 1298 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1299 if (inodes) { 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1301 } else { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1303 } 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 } else if (isbinary) { 1309 if (size == 1) { 1310 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1311 PetscCall(MatView(aij->A, viewer)); 1312 } else { 1313 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } else if (iascii && size == 1) { 1317 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1318 PetscCall(MatView(aij->A, viewer)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } else if (isdraw) { 1321 PetscDraw draw; 1322 PetscBool isnull; 1323 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1324 PetscCall(PetscDrawIsNull(draw, &isnull)); 1325 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 { /* assemble the entire matrix onto first processor */ 1329 Mat A = NULL, Av; 1330 IS isrow, iscol; 1331 1332 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1333 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1334 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1335 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1336 /* The commented code uses MatCreateSubMatrices instead */ 1337 /* 1338 Mat *AA, A = NULL, Av; 1339 IS isrow,iscol; 1340 1341 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1343 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1344 if (rank == 0) { 1345 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1346 A = AA[0]; 1347 Av = AA[0]; 1348 } 1349 PetscCall(MatDestroySubMatrices(1,&AA)); 1350 */ 1351 PetscCall(ISDestroy(&iscol)); 1352 PetscCall(ISDestroy(&isrow)); 1353 /* 1354 Everyone has to call to draw the matrix since the graphics waits are 1355 synchronized across all processors that share the PetscDraw object 1356 */ 1357 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1358 if (rank == 0) { 1359 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1360 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1361 } 1362 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1363 PetscCall(MatDestroy(&A)); 1364 } 1365 PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1369 { 1370 PetscBool iascii, isdraw, issocket, isbinary; 1371 1372 PetscFunctionBegin; 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1374 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1377 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1382 { 1383 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1384 Vec bb1 = NULL; 1385 PetscBool hasop; 1386 1387 PetscFunctionBegin; 1388 if (flag == SOR_APPLY_UPPER) { 1389 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1394 1395 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1396 if (flag & SOR_ZERO_INITIAL_GUESS) { 1397 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1398 its--; 1399 } 1400 1401 while (its--) { 1402 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1403 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1404 1405 /* update rhs: bb1 = bb - B*x */ 1406 PetscCall(VecScale(mat->lvec, -1.0)); 1407 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1408 1409 /* local sweep */ 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1411 } 1412 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1413 if (flag & SOR_ZERO_INITIAL_GUESS) { 1414 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1415 its--; 1416 } 1417 while (its--) { 1418 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1420 1421 /* update rhs: bb1 = bb - B*x */ 1422 PetscCall(VecScale(mat->lvec, -1.0)); 1423 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1424 1425 /* local sweep */ 1426 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1427 } 1428 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 while (its--) { 1434 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 1437 /* update rhs: bb1 = bb - B*x */ 1438 PetscCall(VecScale(mat->lvec, -1.0)); 1439 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1440 1441 /* local sweep */ 1442 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1443 } 1444 } else if (flag & SOR_EISENSTAT) { 1445 Vec xx1; 1446 1447 PetscCall(VecDuplicate(bb, &xx1)); 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1449 1450 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 if (!mat->diag) { 1453 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1454 PetscCall(MatGetDiagonal(matin, mat->diag)); 1455 } 1456 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1457 if (hasop) { 1458 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1459 } else { 1460 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1461 } 1462 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1463 1464 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1468 PetscCall(VecAXPY(xx, 1.0, xx1)); 1469 PetscCall(VecDestroy(&xx1)); 1470 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1471 1472 PetscCall(VecDestroy(&bb1)); 1473 1474 matin->factorerrortype = mat->A->factorerrortype; 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1479 { 1480 Mat aA, aB, Aperm; 1481 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1482 PetscScalar *aa, *ba; 1483 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1484 PetscSF rowsf, sf; 1485 IS parcolp = NULL; 1486 PetscBool done; 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatGetLocalSize(A, &m, &n)); 1490 PetscCall(ISGetIndices(rowp, &rwant)); 1491 PetscCall(ISGetIndices(colp, &cwant)); 1492 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1493 1494 /* Invert row permutation to find out where my rows should go */ 1495 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1496 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1497 PetscCall(PetscSFSetFromOptions(rowsf)); 1498 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1499 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1500 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1501 1502 /* Invert column permutation to find out where my columns should go */ 1503 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1504 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1505 PetscCall(PetscSFSetFromOptions(sf)); 1506 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1507 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1508 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1509 PetscCall(PetscSFDestroy(&sf)); 1510 1511 PetscCall(ISRestoreIndices(rowp, &rwant)); 1512 PetscCall(ISRestoreIndices(colp, &cwant)); 1513 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1514 1515 /* Find out where my gcols should go */ 1516 PetscCall(MatGetSize(aB, NULL, &ng)); 1517 PetscCall(PetscMalloc1(ng, &gcdest)); 1518 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1519 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1520 PetscCall(PetscSFSetFromOptions(sf)); 1521 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1522 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1526 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1527 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1528 for (i = 0; i < m; i++) { 1529 PetscInt row = rdest[i]; 1530 PetscMPIInt rowner; 1531 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1532 for (j = ai[i]; j < ai[i + 1]; j++) { 1533 PetscInt col = cdest[aj[j]]; 1534 PetscMPIInt cowner; 1535 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1536 if (rowner == cowner) dnnz[i]++; 1537 else onnz[i]++; 1538 } 1539 for (j = bi[i]; j < bi[i + 1]; j++) { 1540 PetscInt col = gcdest[bj[j]]; 1541 PetscMPIInt cowner; 1542 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1543 if (rowner == cowner) dnnz[i]++; 1544 else onnz[i]++; 1545 } 1546 } 1547 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1548 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1551 PetscCall(PetscSFDestroy(&rowsf)); 1552 1553 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1554 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1555 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1556 for (i = 0; i < m; i++) { 1557 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1558 PetscInt j0, rowlen; 1559 rowlen = ai[i + 1] - ai[i]; 1560 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1561 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1562 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1563 } 1564 rowlen = bi[i + 1] - bi[i]; 1565 for (j0 = j = 0; j < rowlen; j0 = j) { 1566 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1567 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1568 } 1569 } 1570 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1573 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1574 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1575 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1576 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1577 PetscCall(PetscFree3(work, rdest, cdest)); 1578 PetscCall(PetscFree(gcdest)); 1579 if (parcolp) PetscCall(ISDestroy(&colp)); 1580 *B = Aperm; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1585 { 1586 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1587 1588 PetscFunctionBegin; 1589 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1590 if (ghosts) *ghosts = aij->garray; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1595 { 1596 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1597 Mat A = mat->A, B = mat->B; 1598 PetscLogDouble isend[5], irecv[5]; 1599 1600 PetscFunctionBegin; 1601 info->block_size = 1.0; 1602 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1603 1604 isend[0] = info->nz_used; 1605 isend[1] = info->nz_allocated; 1606 isend[2] = info->nz_unneeded; 1607 isend[3] = info->memory; 1608 isend[4] = info->mallocs; 1609 1610 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1611 1612 isend[0] += info->nz_used; 1613 isend[1] += info->nz_allocated; 1614 isend[2] += info->nz_unneeded; 1615 isend[3] += info->memory; 1616 isend[4] += info->mallocs; 1617 if (flag == MAT_LOCAL) { 1618 info->nz_used = isend[0]; 1619 info->nz_allocated = isend[1]; 1620 info->nz_unneeded = isend[2]; 1621 info->memory = isend[3]; 1622 info->mallocs = isend[4]; 1623 } else if (flag == MAT_GLOBAL_MAX) { 1624 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1625 1626 info->nz_used = irecv[0]; 1627 info->nz_allocated = irecv[1]; 1628 info->nz_unneeded = irecv[2]; 1629 info->memory = irecv[3]; 1630 info->mallocs = irecv[4]; 1631 } else if (flag == MAT_GLOBAL_SUM) { 1632 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1633 1634 info->nz_used = irecv[0]; 1635 info->nz_allocated = irecv[1]; 1636 info->nz_unneeded = irecv[2]; 1637 info->memory = irecv[3]; 1638 info->mallocs = irecv[4]; 1639 } 1640 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1641 info->fill_ratio_needed = 0; 1642 info->factor_mallocs = 0; 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1647 { 1648 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1649 1650 PetscFunctionBegin; 1651 switch (op) { 1652 case MAT_NEW_NONZERO_LOCATIONS: 1653 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1654 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1655 case MAT_KEEP_NONZERO_PATTERN: 1656 case MAT_NEW_NONZERO_LOCATION_ERR: 1657 case MAT_USE_INODES: 1658 case MAT_IGNORE_ZERO_ENTRIES: 1659 case MAT_FORM_EXPLICIT_TRANSPOSE: 1660 MatCheckPreallocated(A, 1); 1661 PetscCall(MatSetOption(a->A, op, flg)); 1662 PetscCall(MatSetOption(a->B, op, flg)); 1663 break; 1664 case MAT_ROW_ORIENTED: 1665 MatCheckPreallocated(A, 1); 1666 a->roworiented = flg; 1667 1668 PetscCall(MatSetOption(a->A, op, flg)); 1669 PetscCall(MatSetOption(a->B, op, flg)); 1670 break; 1671 case MAT_FORCE_DIAGONAL_ENTRIES: 1672 case MAT_SORTED_FULL: 1673 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1674 break; 1675 case MAT_IGNORE_OFF_PROC_ENTRIES: 1676 a->donotstash = flg; 1677 break; 1678 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1679 case MAT_SPD: 1680 case MAT_SYMMETRIC: 1681 case MAT_STRUCTURALLY_SYMMETRIC: 1682 case MAT_HERMITIAN: 1683 case MAT_SYMMETRY_ETERNAL: 1684 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1685 case MAT_SPD_ETERNAL: 1686 /* if the diagonal matrix is square it inherits some of the properties above */ 1687 break; 1688 case MAT_SUBMAT_SINGLEIS: 1689 A->submat_singleis = flg; 1690 break; 1691 case MAT_STRUCTURE_ONLY: 1692 /* The option is handled directly by MatSetOption() */ 1693 break; 1694 default: 1695 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1696 } 1697 PetscFunctionReturn(PETSC_SUCCESS); 1698 } 1699 1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1701 { 1702 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1703 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1704 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1705 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1706 PetscInt *cmap, *idx_p; 1707 1708 PetscFunctionBegin; 1709 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1710 mat->getrowactive = PETSC_TRUE; 1711 1712 if (!mat->rowvalues && (idx || v)) { 1713 /* 1714 allocate enough space to hold information from the longest row. 1715 */ 1716 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1717 PetscInt max = 1, tmp; 1718 for (i = 0; i < matin->rmap->n; i++) { 1719 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1720 if (max < tmp) max = tmp; 1721 } 1722 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1723 } 1724 1725 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1726 lrow = row - rstart; 1727 1728 pvA = &vworkA; 1729 pcA = &cworkA; 1730 pvB = &vworkB; 1731 pcB = &cworkB; 1732 if (!v) { 1733 pvA = NULL; 1734 pvB = NULL; 1735 } 1736 if (!idx) { 1737 pcA = NULL; 1738 if (!v) pcB = NULL; 1739 } 1740 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1741 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1742 nztot = nzA + nzB; 1743 1744 cmap = mat->garray; 1745 if (v || idx) { 1746 if (nztot) { 1747 /* Sort by increasing column numbers, assuming A and B already sorted */ 1748 PetscInt imark = -1; 1749 if (v) { 1750 *v = v_p = mat->rowvalues; 1751 for (i = 0; i < nzB; i++) { 1752 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1753 else break; 1754 } 1755 imark = i; 1756 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1757 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1758 } 1759 if (idx) { 1760 *idx = idx_p = mat->rowindices; 1761 if (imark > -1) { 1762 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1763 } else { 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1766 else break; 1767 } 1768 imark = i; 1769 } 1770 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1771 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1772 } 1773 } else { 1774 if (idx) *idx = NULL; 1775 if (v) *v = NULL; 1776 } 1777 } 1778 *nz = nztot; 1779 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1780 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1787 1788 PetscFunctionBegin; 1789 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1790 aij->getrowactive = PETSC_FALSE; 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1797 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1798 PetscInt i, j, cstart = mat->cmap->rstart; 1799 PetscReal sum = 0.0; 1800 const MatScalar *v, *amata, *bmata; 1801 1802 PetscFunctionBegin; 1803 if (aij->size == 1) { 1804 PetscCall(MatNorm(aij->A, type, norm)); 1805 } else { 1806 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1807 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1808 if (type == NORM_FROBENIUS) { 1809 v = amata; 1810 for (i = 0; i < amat->nz; i++) { 1811 sum += PetscRealPart(PetscConj(*v) * (*v)); 1812 v++; 1813 } 1814 v = bmata; 1815 for (i = 0; i < bmat->nz; i++) { 1816 sum += PetscRealPart(PetscConj(*v) * (*v)); 1817 v++; 1818 } 1819 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1820 *norm = PetscSqrtReal(*norm); 1821 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1822 } else if (type == NORM_1) { /* max column norm */ 1823 PetscReal *tmp, *tmp2; 1824 PetscInt *jj, *garray = aij->garray; 1825 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1826 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1827 *norm = 0.0; 1828 v = amata; 1829 jj = amat->j; 1830 for (j = 0; j < amat->nz; j++) { 1831 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1832 v++; 1833 } 1834 v = bmata; 1835 jj = bmat->j; 1836 for (j = 0; j < bmat->nz; j++) { 1837 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1838 v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1841 for (j = 0; j < mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j = 0; j < aij->A->rmap->n; j++) { 1850 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1851 sum = 0.0; 1852 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1857 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1864 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1866 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1867 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1868 } 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1875 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1876 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1877 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1878 Mat B, A_diag, *B_diag; 1879 const MatScalar *pbv, *bv; 1880 1881 PetscFunctionBegin; 1882 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1883 ma = A->rmap->n; 1884 na = A->cmap->n; 1885 mb = a->B->rmap->n; 1886 nb = a->B->cmap->n; 1887 ai = Aloc->i; 1888 aj = Aloc->j; 1889 bi = Bloc->i; 1890 bj = Bloc->j; 1891 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1892 PetscInt *d_nnz, *g_nnz, *o_nnz; 1893 PetscSFNode *oloc; 1894 PETSC_UNUSED PetscSF sf; 1895 1896 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1897 /* compute d_nnz for preallocation */ 1898 PetscCall(PetscArrayzero(d_nnz, na)); 1899 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1900 /* compute local off-diagonal contributions */ 1901 PetscCall(PetscArrayzero(g_nnz, nb)); 1902 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1903 /* map those to global */ 1904 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1905 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1906 PetscCall(PetscSFSetFromOptions(sf)); 1907 PetscCall(PetscArrayzero(o_nnz, na)); 1908 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1909 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1910 PetscCall(PetscSFDestroy(&sf)); 1911 1912 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1913 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1914 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1915 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1916 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1917 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1918 } else { 1919 B = *matout; 1920 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1921 } 1922 1923 b = (Mat_MPIAIJ *)B->data; 1924 A_diag = a->A; 1925 B_diag = &b->A; 1926 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1927 A_diag_ncol = A_diag->cmap->N; 1928 B_diag_ilen = sub_B_diag->ilen; 1929 B_diag_i = sub_B_diag->i; 1930 1931 /* Set ilen for diagonal of B */ 1932 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1933 1934 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1935 very quickly (=without using MatSetValues), because all writes are local. */ 1936 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1937 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1938 1939 /* copy over the B part */ 1940 PetscCall(PetscMalloc1(bi[mb], &cols)); 1941 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1942 pbv = bv; 1943 row = A->rmap->rstart; 1944 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1945 cols_tmp = cols; 1946 for (i = 0; i < mb; i++) { 1947 ncol = bi[i + 1] - bi[i]; 1948 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1949 row++; 1950 if (pbv) pbv += ncol; 1951 if (cols_tmp) cols_tmp += ncol; 1952 } 1953 PetscCall(PetscFree(cols)); 1954 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1955 1956 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1957 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1959 *matout = B; 1960 } else { 1961 PetscCall(MatHeaderMerge(A, &B)); 1962 } 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1967 { 1968 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1969 Mat a = aij->A, b = aij->B; 1970 PetscInt s1, s2, s3; 1971 1972 PetscFunctionBegin; 1973 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1974 if (rr) { 1975 PetscCall(VecGetLocalSize(rr, &s1)); 1976 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1977 /* Overlap communication with computation. */ 1978 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1979 } 1980 if (ll) { 1981 PetscCall(VecGetLocalSize(ll, &s1)); 1982 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1983 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1984 } 1985 /* scale the diagonal block */ 1986 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1987 1988 if (rr) { 1989 /* Do a scatter end and then right scale the off-diagonal block */ 1990 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1991 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1992 } 1993 PetscFunctionReturn(PETSC_SUCCESS); 1994 } 1995 1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1997 { 1998 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1999 2000 PetscFunctionBegin; 2001 PetscCall(MatSetUnfactored(a->A)); 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2006 { 2007 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2008 Mat a, b, c, d; 2009 PetscBool flg; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; 2013 b = matA->B; 2014 c = matB->A; 2015 d = matB->B; 2016 2017 PetscCall(MatEqual(a, c, &flg)); 2018 if (flg) PetscCall(MatEqual(b, d, &flg)); 2019 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2024 { 2025 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2026 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2027 2028 PetscFunctionBegin; 2029 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2030 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2031 /* because of the column compression in the off-processor part of the matrix a->B, 2032 the number of columns in a->B and b->B may be different, hence we cannot call 2033 the MatCopy() directly on the two parts. If need be, we can provide a more 2034 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2035 then copying the submatrices */ 2036 PetscCall(MatCopy_Basic(A, B, str)); 2037 } else { 2038 PetscCall(MatCopy(a->A, b->A, str)); 2039 PetscCall(MatCopy(a->B, b->B, str)); 2040 } 2041 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 /* 2046 Computes the number of nonzeros per row needed for preallocation when X and Y 2047 have different nonzero structure. 2048 */ 2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2050 { 2051 PetscInt i, j, k, nzx, nzy; 2052 2053 PetscFunctionBegin; 2054 /* Set the number of nonzeros in the new matrix */ 2055 for (i = 0; i < m; i++) { 2056 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2057 nzx = xi[i + 1] - xi[i]; 2058 nzy = yi[i + 1] - yi[i]; 2059 nnz[i] = 0; 2060 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2061 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2062 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2063 nnz[i]++; 2064 } 2065 for (; k < nzy; k++) nnz[i]++; 2066 } 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2072 { 2073 PetscInt m = Y->rmap->N; 2074 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2075 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2076 2077 PetscFunctionBegin; 2078 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2083 { 2084 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2085 2086 PetscFunctionBegin; 2087 if (str == SAME_NONZERO_PATTERN) { 2088 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2089 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2090 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2091 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2092 } else { 2093 Mat B; 2094 PetscInt *nnz_d, *nnz_o; 2095 2096 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2097 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2098 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2099 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2100 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2101 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2102 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2103 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2104 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2105 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2106 PetscCall(MatHeaderMerge(Y, &B)); 2107 PetscCall(PetscFree(nnz_d)); 2108 PetscCall(PetscFree(nnz_o)); 2109 } 2110 PetscFunctionReturn(PETSC_SUCCESS); 2111 } 2112 2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2114 2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2116 { 2117 PetscFunctionBegin; 2118 if (PetscDefined(USE_COMPLEX)) { 2119 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2120 2121 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2122 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2130 2131 PetscFunctionBegin; 2132 PetscCall(MatRealPart(a->A)); 2133 PetscCall(MatRealPart(a->B)); 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatImaginaryPart(a->A)); 2143 PetscCall(MatImaginaryPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 PetscInt i, *idxb = NULL, m = A->rmap->n; 2151 PetscScalar *va, *vv; 2152 Vec vB, vA; 2153 const PetscScalar *vb; 2154 2155 PetscFunctionBegin; 2156 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2157 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2158 2159 PetscCall(VecGetArrayWrite(vA, &va)); 2160 if (idx) { 2161 for (i = 0; i < m; i++) { 2162 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2163 } 2164 } 2165 2166 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2167 PetscCall(PetscMalloc1(m, &idxb)); 2168 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2169 2170 PetscCall(VecGetArrayWrite(v, &vv)); 2171 PetscCall(VecGetArrayRead(vB, &vb)); 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2174 vv[i] = vb[i]; 2175 if (idx) idx[i] = a->garray[idxb[i]]; 2176 } else { 2177 vv[i] = va[i]; 2178 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2182 PetscCall(VecRestoreArrayWrite(vA, &va)); 2183 PetscCall(VecRestoreArrayRead(vB, &vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2191 { 2192 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2193 Vec vB, vA; 2194 2195 PetscFunctionBegin; 2196 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2197 PetscCall(MatGetRowSumAbs(a->A, vA)); 2198 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2199 PetscCall(MatGetRowSumAbs(a->B, vB)); 2200 PetscCall(VecAXPY(vA, 1.0, vB)); 2201 PetscCall(VecDestroy(&vB)); 2202 PetscCall(VecCopy(vA, v)); 2203 PetscCall(VecDestroy(&vA)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2208 { 2209 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2210 PetscInt m = A->rmap->n, n = A->cmap->n; 2211 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2212 PetscInt *cmap = mat->garray; 2213 PetscInt *diagIdx, *offdiagIdx; 2214 Vec diagV, offdiagV; 2215 PetscScalar *a, *diagA, *offdiagA; 2216 const PetscScalar *ba, *bav; 2217 PetscInt r, j, col, ncols, *bi, *bj; 2218 Mat B = mat->B; 2219 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2220 2221 PetscFunctionBegin; 2222 /* When a process holds entire A and other processes have no entry */ 2223 if (A->cmap->N == n) { 2224 PetscCall(VecGetArrayWrite(v, &diagA)); 2225 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2226 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2227 PetscCall(VecDestroy(&diagV)); 2228 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2229 PetscFunctionReturn(PETSC_SUCCESS); 2230 } else if (n == 0) { 2231 if (m) { 2232 PetscCall(VecGetArrayWrite(v, &a)); 2233 for (r = 0; r < m; r++) { 2234 a[r] = 0.0; 2235 if (idx) idx[r] = -1; 2236 } 2237 PetscCall(VecRestoreArrayWrite(v, &a)); 2238 } 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2243 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2244 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2245 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2246 2247 /* Get offdiagIdx[] for implicit 0.0 */ 2248 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2249 ba = bav; 2250 bi = b->i; 2251 bj = b->j; 2252 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2253 for (r = 0; r < m; r++) { 2254 ncols = bi[r + 1] - bi[r]; 2255 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2256 offdiagA[r] = *ba; 2257 offdiagIdx[r] = cmap[0]; 2258 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2259 offdiagA[r] = 0.0; 2260 2261 /* Find first hole in the cmap */ 2262 for (j = 0; j < ncols; j++) { 2263 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2264 if (col > j && j < cstart) { 2265 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2266 break; 2267 } else if (col > j + n && j >= cstart) { 2268 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2269 break; 2270 } 2271 } 2272 if (j == ncols && ncols < A->cmap->N - n) { 2273 /* a hole is outside compressed Bcols */ 2274 if (ncols == 0) { 2275 if (cstart) { 2276 offdiagIdx[r] = 0; 2277 } else offdiagIdx[r] = cend; 2278 } else { /* ncols > 0 */ 2279 offdiagIdx[r] = cmap[ncols - 1] + 1; 2280 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2281 } 2282 } 2283 } 2284 2285 for (j = 0; j < ncols; j++) { 2286 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2287 offdiagA[r] = *ba; 2288 offdiagIdx[r] = cmap[*bj]; 2289 } 2290 ba++; 2291 bj++; 2292 } 2293 } 2294 2295 PetscCall(VecGetArrayWrite(v, &a)); 2296 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2297 for (r = 0; r < m; ++r) { 2298 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2299 a[r] = diagA[r]; 2300 if (idx) idx[r] = cstart + diagIdx[r]; 2301 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) { 2304 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2305 idx[r] = cstart + diagIdx[r]; 2306 } else idx[r] = offdiagIdx[r]; 2307 } 2308 } else { 2309 a[r] = offdiagA[r]; 2310 if (idx) idx[r] = offdiagIdx[r]; 2311 } 2312 } 2313 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2314 PetscCall(VecRestoreArrayWrite(v, &a)); 2315 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2316 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2317 PetscCall(VecDestroy(&diagV)); 2318 PetscCall(VecDestroy(&offdiagV)); 2319 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2320 PetscFunctionReturn(PETSC_SUCCESS); 2321 } 2322 2323 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2324 { 2325 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2326 PetscInt m = A->rmap->n, n = A->cmap->n; 2327 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2328 PetscInt *cmap = mat->garray; 2329 PetscInt *diagIdx, *offdiagIdx; 2330 Vec diagV, offdiagV; 2331 PetscScalar *a, *diagA, *offdiagA; 2332 const PetscScalar *ba, *bav; 2333 PetscInt r, j, col, ncols, *bi, *bj; 2334 Mat B = mat->B; 2335 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2336 2337 PetscFunctionBegin; 2338 /* When a process holds entire A and other processes have no entry */ 2339 if (A->cmap->N == n) { 2340 PetscCall(VecGetArrayWrite(v, &diagA)); 2341 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2342 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2343 PetscCall(VecDestroy(&diagV)); 2344 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2345 PetscFunctionReturn(PETSC_SUCCESS); 2346 } else if (n == 0) { 2347 if (m) { 2348 PetscCall(VecGetArrayWrite(v, &a)); 2349 for (r = 0; r < m; r++) { 2350 a[r] = PETSC_MAX_REAL; 2351 if (idx) idx[r] = -1; 2352 } 2353 PetscCall(VecRestoreArrayWrite(v, &a)); 2354 } 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2359 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2361 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2362 2363 /* Get offdiagIdx[] for implicit 0.0 */ 2364 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2365 ba = bav; 2366 bi = b->i; 2367 bj = b->j; 2368 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2369 for (r = 0; r < m; r++) { 2370 ncols = bi[r + 1] - bi[r]; 2371 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2372 offdiagA[r] = *ba; 2373 offdiagIdx[r] = cmap[0]; 2374 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2375 offdiagA[r] = 0.0; 2376 2377 /* Find first hole in the cmap */ 2378 for (j = 0; j < ncols; j++) { 2379 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2380 if (col > j && j < cstart) { 2381 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2382 break; 2383 } else if (col > j + n && j >= cstart) { 2384 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2385 break; 2386 } 2387 } 2388 if (j == ncols && ncols < A->cmap->N - n) { 2389 /* a hole is outside compressed Bcols */ 2390 if (ncols == 0) { 2391 if (cstart) { 2392 offdiagIdx[r] = 0; 2393 } else offdiagIdx[r] = cend; 2394 } else { /* ncols > 0 */ 2395 offdiagIdx[r] = cmap[ncols - 1] + 1; 2396 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2397 } 2398 } 2399 } 2400 2401 for (j = 0; j < ncols; j++) { 2402 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2403 offdiagA[r] = *ba; 2404 offdiagIdx[r] = cmap[*bj]; 2405 } 2406 ba++; 2407 bj++; 2408 } 2409 } 2410 2411 PetscCall(VecGetArrayWrite(v, &a)); 2412 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2413 for (r = 0; r < m; ++r) { 2414 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2415 a[r] = diagA[r]; 2416 if (idx) idx[r] = cstart + diagIdx[r]; 2417 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2418 a[r] = diagA[r]; 2419 if (idx) { 2420 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2421 idx[r] = cstart + diagIdx[r]; 2422 } else idx[r] = offdiagIdx[r]; 2423 } 2424 } else { 2425 a[r] = offdiagA[r]; 2426 if (idx) idx[r] = offdiagIdx[r]; 2427 } 2428 } 2429 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2430 PetscCall(VecRestoreArrayWrite(v, &a)); 2431 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2432 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2433 PetscCall(VecDestroy(&diagV)); 2434 PetscCall(VecDestroy(&offdiagV)); 2435 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2436 PetscFunctionReturn(PETSC_SUCCESS); 2437 } 2438 2439 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2440 { 2441 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2442 PetscInt m = A->rmap->n, n = A->cmap->n; 2443 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2444 PetscInt *cmap = mat->garray; 2445 PetscInt *diagIdx, *offdiagIdx; 2446 Vec diagV, offdiagV; 2447 PetscScalar *a, *diagA, *offdiagA; 2448 const PetscScalar *ba, *bav; 2449 PetscInt r, j, col, ncols, *bi, *bj; 2450 Mat B = mat->B; 2451 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2452 2453 PetscFunctionBegin; 2454 /* When a process holds entire A and other processes have no entry */ 2455 if (A->cmap->N == n) { 2456 PetscCall(VecGetArrayWrite(v, &diagA)); 2457 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2458 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2459 PetscCall(VecDestroy(&diagV)); 2460 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2461 PetscFunctionReturn(PETSC_SUCCESS); 2462 } else if (n == 0) { 2463 if (m) { 2464 PetscCall(VecGetArrayWrite(v, &a)); 2465 for (r = 0; r < m; r++) { 2466 a[r] = PETSC_MIN_REAL; 2467 if (idx) idx[r] = -1; 2468 } 2469 PetscCall(VecRestoreArrayWrite(v, &a)); 2470 } 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } 2473 2474 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2475 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2476 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2477 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2478 2479 /* Get offdiagIdx[] for implicit 0.0 */ 2480 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2481 ba = bav; 2482 bi = b->i; 2483 bj = b->j; 2484 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2485 for (r = 0; r < m; r++) { 2486 ncols = bi[r + 1] - bi[r]; 2487 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2488 offdiagA[r] = *ba; 2489 offdiagIdx[r] = cmap[0]; 2490 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2491 offdiagA[r] = 0.0; 2492 2493 /* Find first hole in the cmap */ 2494 for (j = 0; j < ncols; j++) { 2495 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2496 if (col > j && j < cstart) { 2497 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2498 break; 2499 } else if (col > j + n && j >= cstart) { 2500 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2501 break; 2502 } 2503 } 2504 if (j == ncols && ncols < A->cmap->N - n) { 2505 /* a hole is outside compressed Bcols */ 2506 if (ncols == 0) { 2507 if (cstart) { 2508 offdiagIdx[r] = 0; 2509 } else offdiagIdx[r] = cend; 2510 } else { /* ncols > 0 */ 2511 offdiagIdx[r] = cmap[ncols - 1] + 1; 2512 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2513 } 2514 } 2515 } 2516 2517 for (j = 0; j < ncols; j++) { 2518 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2519 offdiagA[r] = *ba; 2520 offdiagIdx[r] = cmap[*bj]; 2521 } 2522 ba++; 2523 bj++; 2524 } 2525 } 2526 2527 PetscCall(VecGetArrayWrite(v, &a)); 2528 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2529 for (r = 0; r < m; ++r) { 2530 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2531 a[r] = diagA[r]; 2532 if (idx) idx[r] = cstart + diagIdx[r]; 2533 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2534 a[r] = diagA[r]; 2535 if (idx) { 2536 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2537 idx[r] = cstart + diagIdx[r]; 2538 } else idx[r] = offdiagIdx[r]; 2539 } 2540 } else { 2541 a[r] = offdiagA[r]; 2542 if (idx) idx[r] = offdiagIdx[r]; 2543 } 2544 } 2545 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2546 PetscCall(VecRestoreArrayWrite(v, &a)); 2547 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2548 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2549 PetscCall(VecDestroy(&diagV)); 2550 PetscCall(VecDestroy(&offdiagV)); 2551 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2552 PetscFunctionReturn(PETSC_SUCCESS); 2553 } 2554 2555 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2556 { 2557 Mat *dummy; 2558 2559 PetscFunctionBegin; 2560 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2561 *newmat = *dummy; 2562 PetscCall(PetscFree(dummy)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2567 { 2568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2572 A->factorerrortype = a->A->factorerrortype; 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2577 { 2578 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2579 2580 PetscFunctionBegin; 2581 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2582 PetscCall(MatSetRandom(aij->A, rctx)); 2583 if (x->assembled) { 2584 PetscCall(MatSetRandom(aij->B, rctx)); 2585 } else { 2586 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2587 } 2588 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2589 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2594 { 2595 PetscFunctionBegin; 2596 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2597 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2598 PetscFunctionReturn(PETSC_SUCCESS); 2599 } 2600 2601 /*@ 2602 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2603 2604 Not Collective 2605 2606 Input Parameter: 2607 . A - the matrix 2608 2609 Output Parameter: 2610 . nz - the number of nonzeros 2611 2612 Level: advanced 2613 2614 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2615 @*/ 2616 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2617 { 2618 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2619 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2620 PetscBool isaij; 2621 2622 PetscFunctionBegin; 2623 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2624 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2625 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2626 PetscFunctionReturn(PETSC_SUCCESS); 2627 } 2628 2629 /*@ 2630 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2631 2632 Collective 2633 2634 Input Parameters: 2635 + A - the matrix 2636 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2637 2638 Level: advanced 2639 2640 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2641 @*/ 2642 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2643 { 2644 PetscFunctionBegin; 2645 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2650 { 2651 PetscBool sc = PETSC_FALSE, flg; 2652 2653 PetscFunctionBegin; 2654 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2655 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2656 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2657 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2658 PetscOptionsHeadEnd(); 2659 PetscFunctionReturn(PETSC_SUCCESS); 2660 } 2661 2662 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2663 { 2664 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2665 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2666 2667 PetscFunctionBegin; 2668 if (!Y->preallocated) { 2669 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2670 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2671 PetscInt nonew = aij->nonew; 2672 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2673 aij->nonew = nonew; 2674 } 2675 PetscCall(MatShift_Basic(Y, a)); 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2680 { 2681 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2682 2683 PetscFunctionBegin; 2684 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2685 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2686 if (d) { 2687 PetscInt rstart; 2688 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2689 *d += rstart; 2690 } 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2695 { 2696 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2697 2698 PetscFunctionBegin; 2699 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2700 PetscFunctionReturn(PETSC_SUCCESS); 2701 } 2702 2703 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2704 { 2705 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2706 2707 PetscFunctionBegin; 2708 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2709 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2714 MatGetRow_MPIAIJ, 2715 MatRestoreRow_MPIAIJ, 2716 MatMult_MPIAIJ, 2717 /* 4*/ MatMultAdd_MPIAIJ, 2718 MatMultTranspose_MPIAIJ, 2719 MatMultTransposeAdd_MPIAIJ, 2720 NULL, 2721 NULL, 2722 NULL, 2723 /*10*/ NULL, 2724 NULL, 2725 NULL, 2726 MatSOR_MPIAIJ, 2727 MatTranspose_MPIAIJ, 2728 /*15*/ MatGetInfo_MPIAIJ, 2729 MatEqual_MPIAIJ, 2730 MatGetDiagonal_MPIAIJ, 2731 MatDiagonalScale_MPIAIJ, 2732 MatNorm_MPIAIJ, 2733 /*20*/ MatAssemblyBegin_MPIAIJ, 2734 MatAssemblyEnd_MPIAIJ, 2735 MatSetOption_MPIAIJ, 2736 MatZeroEntries_MPIAIJ, 2737 /*24*/ MatZeroRows_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 NULL, 2742 /*29*/ MatSetUp_MPI_Hash, 2743 NULL, 2744 NULL, 2745 MatGetDiagonalBlock_MPIAIJ, 2746 NULL, 2747 /*34*/ MatDuplicate_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*39*/ MatAXPY_MPIAIJ, 2753 MatCreateSubMatrices_MPIAIJ, 2754 MatIncreaseOverlap_MPIAIJ, 2755 MatGetValues_MPIAIJ, 2756 MatCopy_MPIAIJ, 2757 /*44*/ MatGetRowMax_MPIAIJ, 2758 MatScale_MPIAIJ, 2759 MatShift_MPIAIJ, 2760 MatDiagonalSet_MPIAIJ, 2761 MatZeroRowsColumns_MPIAIJ, 2762 /*49*/ MatSetRandom_MPIAIJ, 2763 MatGetRowIJ_MPIAIJ, 2764 MatRestoreRowIJ_MPIAIJ, 2765 NULL, 2766 NULL, 2767 /*54*/ MatFDColoringCreate_MPIXAIJ, 2768 NULL, 2769 MatSetUnfactored_MPIAIJ, 2770 MatPermute_MPIAIJ, 2771 NULL, 2772 /*59*/ MatCreateSubMatrix_MPIAIJ, 2773 MatDestroy_MPIAIJ, 2774 MatView_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*64*/ NULL, 2778 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2779 NULL, 2780 NULL, 2781 NULL, 2782 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2783 MatGetRowMinAbs_MPIAIJ, 2784 NULL, 2785 NULL, 2786 NULL, 2787 NULL, 2788 /*75*/ MatFDColoringApply_AIJ, 2789 MatSetFromOptions_MPIAIJ, 2790 NULL, 2791 NULL, 2792 MatFindZeroDiagonals_MPIAIJ, 2793 /*80*/ NULL, 2794 NULL, 2795 NULL, 2796 /*83*/ MatLoad_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 /*89*/ NULL, 2803 NULL, 2804 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2805 NULL, 2806 NULL, 2807 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 MatBindToCPU_MPIAIJ, 2812 /*99*/ MatProductSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatConjugate_MPIAIJ, 2816 NULL, 2817 /*104*/ MatSetValuesRow_MPIAIJ, 2818 MatRealPart_MPIAIJ, 2819 MatImaginaryPart_MPIAIJ, 2820 NULL, 2821 NULL, 2822 /*109*/ NULL, 2823 NULL, 2824 MatGetRowMin_MPIAIJ, 2825 NULL, 2826 MatMissingDiagonal_MPIAIJ, 2827 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2828 NULL, 2829 MatGetGhosts_MPIAIJ, 2830 NULL, 2831 NULL, 2832 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2833 NULL, 2834 NULL, 2835 NULL, 2836 MatGetMultiProcBlock_MPIAIJ, 2837 /*124*/ MatFindNonzeroRows_MPIAIJ, 2838 MatGetColumnReductions_MPIAIJ, 2839 MatInvertBlockDiagonal_MPIAIJ, 2840 MatInvertVariableBlockDiagonal_MPIAIJ, 2841 MatCreateSubMatricesMPI_MPIAIJ, 2842 /*129*/ NULL, 2843 NULL, 2844 NULL, 2845 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2846 NULL, 2847 /*134*/ NULL, 2848 NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 /*139*/ MatSetBlockSizes_MPIAIJ, 2853 NULL, 2854 NULL, 2855 MatFDColoringSetUp_MPIXAIJ, 2856 MatFindOffBlockDiagonalEntries_MPIAIJ, 2857 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2858 /*145*/ NULL, 2859 NULL, 2860 NULL, 2861 MatCreateGraph_Simple_AIJ, 2862 NULL, 2863 /*150*/ NULL, 2864 MatEliminateZeros_MPIAIJ, 2865 MatGetRowSumAbs_MPIAIJ, 2866 NULL}; 2867 2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2869 { 2870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2871 2872 PetscFunctionBegin; 2873 PetscCall(MatStoreValues(aij->A)); 2874 PetscCall(MatStoreValues(aij->B)); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatRetrieveValues(aij->A)); 2884 PetscCall(MatRetrieveValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2889 { 2890 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2891 PetscMPIInt size; 2892 2893 PetscFunctionBegin; 2894 if (B->hash_active) { 2895 B->ops[0] = b->cops; 2896 B->hash_active = PETSC_FALSE; 2897 } 2898 PetscCall(PetscLayoutSetUp(B->rmap)); 2899 PetscCall(PetscLayoutSetUp(B->cmap)); 2900 2901 #if defined(PETSC_USE_CTABLE) 2902 PetscCall(PetscHMapIDestroy(&b->colmap)); 2903 #else 2904 PetscCall(PetscFree(b->colmap)); 2905 #endif 2906 PetscCall(PetscFree(b->garray)); 2907 PetscCall(VecDestroy(&b->lvec)); 2908 PetscCall(VecScatterDestroy(&b->Mvctx)); 2909 2910 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2911 2912 MatSeqXAIJGetOptions_Private(b->B); 2913 PetscCall(MatDestroy(&b->B)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2915 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2916 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2917 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2918 MatSeqXAIJRestoreOptions_Private(b->B); 2919 2920 MatSeqXAIJGetOptions_Private(b->A); 2921 PetscCall(MatDestroy(&b->A)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 MatSeqXAIJRestoreOptions_Private(b->A); 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(PETSC_SUCCESS); 2934 } 2935 2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2939 2940 PetscFunctionBegin; 2941 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2942 PetscCall(PetscLayoutSetUp(B->rmap)); 2943 PetscCall(PetscLayoutSetUp(B->cmap)); 2944 2945 #if defined(PETSC_USE_CTABLE) 2946 PetscCall(PetscHMapIDestroy(&b->colmap)); 2947 #else 2948 PetscCall(PetscFree(b->colmap)); 2949 #endif 2950 PetscCall(PetscFree(b->garray)); 2951 PetscCall(VecDestroy(&b->lvec)); 2952 PetscCall(VecScatterDestroy(&b->Mvctx)); 2953 2954 PetscCall(MatResetPreallocation(b->A)); 2955 PetscCall(MatResetPreallocation(b->B)); 2956 B->preallocated = PETSC_TRUE; 2957 B->was_assembled = PETSC_FALSE; 2958 B->assembled = PETSC_FALSE; 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2963 { 2964 Mat mat; 2965 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2966 2967 PetscFunctionBegin; 2968 *newmat = NULL; 2969 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2970 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2971 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2972 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2973 a = (Mat_MPIAIJ *)mat->data; 2974 2975 mat->factortype = matin->factortype; 2976 mat->assembled = matin->assembled; 2977 mat->insertmode = NOT_SET_VALUES; 2978 2979 a->size = oldmat->size; 2980 a->rank = oldmat->rank; 2981 a->donotstash = oldmat->donotstash; 2982 a->roworiented = oldmat->roworiented; 2983 a->rowindices = NULL; 2984 a->rowvalues = NULL; 2985 a->getrowactive = PETSC_FALSE; 2986 2987 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2988 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2989 if (matin->hash_active) { 2990 PetscCall(MatSetUp(mat)); 2991 } else { 2992 mat->preallocated = matin->preallocated; 2993 if (oldmat->colmap) { 2994 #if defined(PETSC_USE_CTABLE) 2995 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2996 #else 2997 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2998 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2999 #endif 3000 } else a->colmap = NULL; 3001 if (oldmat->garray) { 3002 PetscInt len; 3003 len = oldmat->B->cmap->n; 3004 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3005 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3006 } else a->garray = NULL; 3007 3008 /* It may happen MatDuplicate is called with a non-assembled matrix 3009 In fact, MatDuplicate only requires the matrix to be preallocated 3010 This may happen inside a DMCreateMatrix_Shell */ 3011 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3012 if (oldmat->Mvctx) { 3013 a->Mvctx = oldmat->Mvctx; 3014 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3015 } 3016 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3017 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3018 } 3019 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3020 *newmat = mat; 3021 PetscFunctionReturn(PETSC_SUCCESS); 3022 } 3023 3024 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3025 { 3026 PetscBool isbinary, ishdf5; 3027 3028 PetscFunctionBegin; 3029 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3030 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3031 /* force binary viewer to load .info file if it has not yet done so */ 3032 PetscCall(PetscViewerSetUp(viewer)); 3033 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3034 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3035 if (isbinary) { 3036 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3037 } else if (ishdf5) { 3038 #if defined(PETSC_HAVE_HDF5) 3039 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3040 #else 3041 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3042 #endif 3043 } else { 3044 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3045 } 3046 PetscFunctionReturn(PETSC_SUCCESS); 3047 } 3048 3049 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3050 { 3051 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3052 PetscInt *rowidxs, *colidxs; 3053 PetscScalar *matvals; 3054 3055 PetscFunctionBegin; 3056 PetscCall(PetscViewerSetUp(viewer)); 3057 3058 /* read in matrix header */ 3059 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3060 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3061 M = header[1]; 3062 N = header[2]; 3063 nz = header[3]; 3064 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3065 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3066 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3067 3068 /* set block sizes from the viewer's .info file */ 3069 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3070 /* set global sizes if not set already */ 3071 if (mat->rmap->N < 0) mat->rmap->N = M; 3072 if (mat->cmap->N < 0) mat->cmap->N = N; 3073 PetscCall(PetscLayoutSetUp(mat->rmap)); 3074 PetscCall(PetscLayoutSetUp(mat->cmap)); 3075 3076 /* check if the matrix sizes are correct */ 3077 PetscCall(MatGetSize(mat, &rows, &cols)); 3078 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3079 3080 /* read in row lengths and build row indices */ 3081 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3082 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3083 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3084 rowidxs[0] = 0; 3085 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3086 if (nz != PETSC_MAX_INT) { 3087 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3088 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3089 } 3090 3091 /* read in column indices and matrix values */ 3092 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3093 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3094 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3095 /* store matrix indices and values */ 3096 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3097 PetscCall(PetscFree(rowidxs)); 3098 PetscCall(PetscFree2(colidxs, matvals)); 3099 PetscFunctionReturn(PETSC_SUCCESS); 3100 } 3101 3102 /* Not scalable because of ISAllGather() unless getting all columns. */ 3103 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3104 { 3105 IS iscol_local; 3106 PetscBool isstride; 3107 PetscMPIInt lisstride = 0, gisstride; 3108 3109 PetscFunctionBegin; 3110 /* check if we are grabbing all columns*/ 3111 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3112 3113 if (isstride) { 3114 PetscInt start, len, mstart, mlen; 3115 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3116 PetscCall(ISGetLocalSize(iscol, &len)); 3117 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3118 if (mstart == start && mlen - mstart == len) lisstride = 1; 3119 } 3120 3121 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3122 if (gisstride) { 3123 PetscInt N; 3124 PetscCall(MatGetSize(mat, NULL, &N)); 3125 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3126 PetscCall(ISSetIdentity(iscol_local)); 3127 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3128 } else { 3129 PetscInt cbs; 3130 PetscCall(ISGetBlockSize(iscol, &cbs)); 3131 PetscCall(ISAllGather(iscol, &iscol_local)); 3132 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3133 } 3134 3135 *isseq = iscol_local; 3136 PetscFunctionReturn(PETSC_SUCCESS); 3137 } 3138 3139 /* 3140 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3141 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3142 3143 Input Parameters: 3144 + mat - matrix 3145 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3146 i.e., mat->rstart <= isrow[i] < mat->rend 3147 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3148 i.e., mat->cstart <= iscol[i] < mat->cend 3149 3150 Output Parameters: 3151 + isrow_d - sequential row index set for retrieving mat->A 3152 . iscol_d - sequential column index set for retrieving mat->A 3153 . iscol_o - sequential column index set for retrieving mat->B 3154 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3155 */ 3156 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3157 { 3158 Vec x, cmap; 3159 const PetscInt *is_idx; 3160 PetscScalar *xarray, *cmaparray; 3161 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3163 Mat B = a->B; 3164 Vec lvec = a->lvec, lcmap; 3165 PetscInt i, cstart, cend, Bn = B->cmap->N; 3166 MPI_Comm comm; 3167 VecScatter Mvctx = a->Mvctx; 3168 3169 PetscFunctionBegin; 3170 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3171 PetscCall(ISGetLocalSize(iscol, &ncols)); 3172 3173 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3174 PetscCall(MatCreateVecs(mat, &x, NULL)); 3175 PetscCall(VecSet(x, -1.0)); 3176 PetscCall(VecDuplicate(x, &cmap)); 3177 PetscCall(VecSet(cmap, -1.0)); 3178 3179 /* Get start indices */ 3180 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3181 isstart -= ncols; 3182 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3183 3184 PetscCall(ISGetIndices(iscol, &is_idx)); 3185 PetscCall(VecGetArray(x, &xarray)); 3186 PetscCall(VecGetArray(cmap, &cmaparray)); 3187 PetscCall(PetscMalloc1(ncols, &idx)); 3188 for (i = 0; i < ncols; i++) { 3189 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3190 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3191 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3192 } 3193 PetscCall(VecRestoreArray(x, &xarray)); 3194 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3195 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3196 3197 /* Get iscol_d */ 3198 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3199 PetscCall(ISGetBlockSize(iscol, &i)); 3200 PetscCall(ISSetBlockSize(*iscol_d, i)); 3201 3202 /* Get isrow_d */ 3203 PetscCall(ISGetLocalSize(isrow, &m)); 3204 rstart = mat->rmap->rstart; 3205 PetscCall(PetscMalloc1(m, &idx)); 3206 PetscCall(ISGetIndices(isrow, &is_idx)); 3207 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3208 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3209 3210 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3211 PetscCall(ISGetBlockSize(isrow, &i)); 3212 PetscCall(ISSetBlockSize(*isrow_d, i)); 3213 3214 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3215 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3216 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3217 3218 PetscCall(VecDuplicate(lvec, &lcmap)); 3219 3220 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3221 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3222 3223 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3224 /* off-process column indices */ 3225 count = 0; 3226 PetscCall(PetscMalloc1(Bn, &idx)); 3227 PetscCall(PetscMalloc1(Bn, &cmap1)); 3228 3229 PetscCall(VecGetArray(lvec, &xarray)); 3230 PetscCall(VecGetArray(lcmap, &cmaparray)); 3231 for (i = 0; i < Bn; i++) { 3232 if (PetscRealPart(xarray[i]) > -1.0) { 3233 idx[count] = i; /* local column index in off-diagonal part B */ 3234 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3235 count++; 3236 } 3237 } 3238 PetscCall(VecRestoreArray(lvec, &xarray)); 3239 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3240 3241 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3242 /* cannot ensure iscol_o has same blocksize as iscol! */ 3243 3244 PetscCall(PetscFree(idx)); 3245 *garray = cmap1; 3246 3247 PetscCall(VecDestroy(&x)); 3248 PetscCall(VecDestroy(&cmap)); 3249 PetscCall(VecDestroy(&lcmap)); 3250 PetscFunctionReturn(PETSC_SUCCESS); 3251 } 3252 3253 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3254 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3255 { 3256 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3257 Mat M = NULL; 3258 MPI_Comm comm; 3259 IS iscol_d, isrow_d, iscol_o; 3260 Mat Asub = NULL, Bsub = NULL; 3261 PetscInt n; 3262 3263 PetscFunctionBegin; 3264 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3265 3266 if (call == MAT_REUSE_MATRIX) { 3267 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3268 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3269 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3270 3271 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3272 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3273 3274 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3275 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3276 3277 /* Update diagonal and off-diagonal portions of submat */ 3278 asub = (Mat_MPIAIJ *)(*submat)->data; 3279 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3280 PetscCall(ISGetLocalSize(iscol_o, &n)); 3281 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3282 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3283 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3284 3285 } else { /* call == MAT_INITIAL_MATRIX) */ 3286 const PetscInt *garray; 3287 PetscInt BsubN; 3288 3289 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3290 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3291 3292 /* Create local submatrices Asub and Bsub */ 3293 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3294 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3295 3296 /* Create submatrix M */ 3297 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3298 3299 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3300 asub = (Mat_MPIAIJ *)M->data; 3301 3302 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3303 n = asub->B->cmap->N; 3304 if (BsubN > n) { 3305 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3306 const PetscInt *idx; 3307 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3308 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3309 3310 PetscCall(PetscMalloc1(n, &idx_new)); 3311 j = 0; 3312 PetscCall(ISGetIndices(iscol_o, &idx)); 3313 for (i = 0; i < n; i++) { 3314 if (j >= BsubN) break; 3315 while (subgarray[i] > garray[j]) j++; 3316 3317 if (subgarray[i] == garray[j]) { 3318 idx_new[i] = idx[j++]; 3319 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3320 } 3321 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3322 3323 PetscCall(ISDestroy(&iscol_o)); 3324 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3325 3326 } else if (BsubN < n) { 3327 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3328 } 3329 3330 PetscCall(PetscFree(garray)); 3331 *submat = M; 3332 3333 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3334 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3335 PetscCall(ISDestroy(&isrow_d)); 3336 3337 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3338 PetscCall(ISDestroy(&iscol_d)); 3339 3340 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3341 PetscCall(ISDestroy(&iscol_o)); 3342 } 3343 PetscFunctionReturn(PETSC_SUCCESS); 3344 } 3345 3346 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3347 { 3348 IS iscol_local = NULL, isrow_d; 3349 PetscInt csize; 3350 PetscInt n, i, j, start, end; 3351 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3352 MPI_Comm comm; 3353 3354 PetscFunctionBegin; 3355 /* If isrow has same processor distribution as mat, 3356 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3357 if (call == MAT_REUSE_MATRIX) { 3358 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3359 if (isrow_d) { 3360 sameRowDist = PETSC_TRUE; 3361 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3362 } else { 3363 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3364 if (iscol_local) { 3365 sameRowDist = PETSC_TRUE; 3366 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3367 } 3368 } 3369 } else { 3370 /* Check if isrow has same processor distribution as mat */ 3371 sameDist[0] = PETSC_FALSE; 3372 PetscCall(ISGetLocalSize(isrow, &n)); 3373 if (!n) { 3374 sameDist[0] = PETSC_TRUE; 3375 } else { 3376 PetscCall(ISGetMinMax(isrow, &i, &j)); 3377 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3378 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3379 } 3380 3381 /* Check if iscol has same processor distribution as mat */ 3382 sameDist[1] = PETSC_FALSE; 3383 PetscCall(ISGetLocalSize(iscol, &n)); 3384 if (!n) { 3385 sameDist[1] = PETSC_TRUE; 3386 } else { 3387 PetscCall(ISGetMinMax(iscol, &i, &j)); 3388 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3389 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3390 } 3391 3392 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3393 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3394 sameRowDist = tsameDist[0]; 3395 } 3396 3397 if (sameRowDist) { 3398 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3399 /* isrow and iscol have same processor distribution as mat */ 3400 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3401 PetscFunctionReturn(PETSC_SUCCESS); 3402 } else { /* sameRowDist */ 3403 /* isrow has same processor distribution as mat */ 3404 if (call == MAT_INITIAL_MATRIX) { 3405 PetscBool sorted; 3406 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3407 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3408 PetscCall(ISGetSize(iscol, &i)); 3409 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3410 3411 PetscCall(ISSorted(iscol_local, &sorted)); 3412 if (sorted) { 3413 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3414 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3415 PetscFunctionReturn(PETSC_SUCCESS); 3416 } 3417 } else { /* call == MAT_REUSE_MATRIX */ 3418 IS iscol_sub; 3419 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3420 if (iscol_sub) { 3421 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3422 PetscFunctionReturn(PETSC_SUCCESS); 3423 } 3424 } 3425 } 3426 } 3427 3428 /* General case: iscol -> iscol_local which has global size of iscol */ 3429 if (call == MAT_REUSE_MATRIX) { 3430 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3431 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3432 } else { 3433 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3434 } 3435 3436 PetscCall(ISGetLocalSize(iscol, &csize)); 3437 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3438 3439 if (call == MAT_INITIAL_MATRIX) { 3440 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3441 PetscCall(ISDestroy(&iscol_local)); 3442 } 3443 PetscFunctionReturn(PETSC_SUCCESS); 3444 } 3445 3446 /*@C 3447 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3448 and "off-diagonal" part of the matrix in CSR format. 3449 3450 Collective 3451 3452 Input Parameters: 3453 + comm - MPI communicator 3454 . A - "diagonal" portion of matrix 3455 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3456 - garray - global index of `B` columns 3457 3458 Output Parameter: 3459 . mat - the matrix, with input `A` as its local diagonal matrix 3460 3461 Level: advanced 3462 3463 Notes: 3464 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3465 3466 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3467 3468 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3469 @*/ 3470 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3471 { 3472 Mat_MPIAIJ *maij; 3473 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3474 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3475 const PetscScalar *oa; 3476 Mat Bnew; 3477 PetscInt m, n, N; 3478 MatType mpi_mat_type; 3479 3480 PetscFunctionBegin; 3481 PetscCall(MatCreate(comm, mat)); 3482 PetscCall(MatGetSize(A, &m, &n)); 3483 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3484 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3485 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3486 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3487 3488 /* Get global columns of mat */ 3489 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3490 3491 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3492 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3493 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3494 PetscCall(MatSetType(*mat, mpi_mat_type)); 3495 3496 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3497 maij = (Mat_MPIAIJ *)(*mat)->data; 3498 3499 (*mat)->preallocated = PETSC_TRUE; 3500 3501 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3502 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3503 3504 /* Set A as diagonal portion of *mat */ 3505 maij->A = A; 3506 3507 nz = oi[m]; 3508 for (i = 0; i < nz; i++) { 3509 col = oj[i]; 3510 oj[i] = garray[col]; 3511 } 3512 3513 /* Set Bnew as off-diagonal portion of *mat */ 3514 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3515 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3516 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3517 bnew = (Mat_SeqAIJ *)Bnew->data; 3518 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3519 maij->B = Bnew; 3520 3521 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3522 3523 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3524 b->free_a = PETSC_FALSE; 3525 b->free_ij = PETSC_FALSE; 3526 PetscCall(MatDestroy(&B)); 3527 3528 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3529 bnew->free_a = PETSC_TRUE; 3530 bnew->free_ij = PETSC_TRUE; 3531 3532 /* condense columns of maij->B */ 3533 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3534 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3537 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3538 PetscFunctionReturn(PETSC_SUCCESS); 3539 } 3540 3541 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3542 3543 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3544 { 3545 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3546 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3547 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3548 Mat M, Msub, B = a->B; 3549 MatScalar *aa; 3550 Mat_SeqAIJ *aij; 3551 PetscInt *garray = a->garray, *colsub, Ncols; 3552 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3553 IS iscol_sub, iscmap; 3554 const PetscInt *is_idx, *cmap; 3555 PetscBool allcolumns = PETSC_FALSE; 3556 MPI_Comm comm; 3557 3558 PetscFunctionBegin; 3559 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3560 if (call == MAT_REUSE_MATRIX) { 3561 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3562 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3563 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3564 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3566 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3567 3568 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3569 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3570 3571 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3572 3573 } else { /* call == MAT_INITIAL_MATRIX) */ 3574 PetscBool flg; 3575 3576 PetscCall(ISGetLocalSize(iscol, &n)); 3577 PetscCall(ISGetSize(iscol, &Ncols)); 3578 3579 /* (1) iscol -> nonscalable iscol_local */ 3580 /* Check for special case: each processor gets entire matrix columns */ 3581 PetscCall(ISIdentity(iscol_local, &flg)); 3582 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3583 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3584 if (allcolumns) { 3585 iscol_sub = iscol_local; 3586 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3587 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3588 3589 } else { 3590 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3591 PetscInt *idx, *cmap1, k; 3592 PetscCall(PetscMalloc1(Ncols, &idx)); 3593 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3594 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3595 count = 0; 3596 k = 0; 3597 for (i = 0; i < Ncols; i++) { 3598 j = is_idx[i]; 3599 if (j >= cstart && j < cend) { 3600 /* diagonal part of mat */ 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (Bn) { 3604 /* off-diagonal part of mat */ 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } else if (j > garray[k]) { 3609 while (j > garray[k] && k < Bn - 1) k++; 3610 if (j == garray[k]) { 3611 idx[count] = j; 3612 cmap1[count++] = i; /* column index in submat */ 3613 } 3614 } 3615 } 3616 } 3617 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3618 3619 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3620 PetscCall(ISGetBlockSize(iscol, &cbs)); 3621 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3622 3623 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3624 } 3625 3626 /* (3) Create sequential Msub */ 3627 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3628 } 3629 3630 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3631 aij = (Mat_SeqAIJ *)(Msub)->data; 3632 ii = aij->i; 3633 PetscCall(ISGetIndices(iscmap, &cmap)); 3634 3635 /* 3636 m - number of local rows 3637 Ncols - number of columns (same on all processors) 3638 rstart - first row in new global matrix generated 3639 */ 3640 PetscCall(MatGetSize(Msub, &m, NULL)); 3641 3642 if (call == MAT_INITIAL_MATRIX) { 3643 /* (4) Create parallel newmat */ 3644 PetscMPIInt rank, size; 3645 PetscInt csize; 3646 3647 PetscCallMPI(MPI_Comm_size(comm, &size)); 3648 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3649 3650 /* 3651 Determine the number of non-zeros in the diagonal and off-diagonal 3652 portions of the matrix in order to do correct preallocation 3653 */ 3654 3655 /* first get start and end of "diagonal" columns */ 3656 PetscCall(ISGetLocalSize(iscol, &csize)); 3657 if (csize == PETSC_DECIDE) { 3658 PetscCall(ISGetSize(isrow, &mglobal)); 3659 if (mglobal == Ncols) { /* square matrix */ 3660 nlocal = m; 3661 } else { 3662 nlocal = Ncols / size + ((Ncols % size) > rank); 3663 } 3664 } else { 3665 nlocal = csize; 3666 } 3667 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3668 rstart = rend - nlocal; 3669 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3670 3671 /* next, compute all the lengths */ 3672 jj = aij->j; 3673 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3674 olens = dlens + m; 3675 for (i = 0; i < m; i++) { 3676 jend = ii[i + 1] - ii[i]; 3677 olen = 0; 3678 dlen = 0; 3679 for (j = 0; j < jend; j++) { 3680 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3681 else dlen++; 3682 jj++; 3683 } 3684 olens[i] = olen; 3685 dlens[i] = dlen; 3686 } 3687 3688 PetscCall(ISGetBlockSize(isrow, &bs)); 3689 PetscCall(ISGetBlockSize(iscol, &cbs)); 3690 3691 PetscCall(MatCreate(comm, &M)); 3692 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3693 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3694 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3695 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3696 PetscCall(PetscFree(dlens)); 3697 3698 } else { /* call == MAT_REUSE_MATRIX */ 3699 M = *newmat; 3700 PetscCall(MatGetLocalSize(M, &i, NULL)); 3701 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3702 PetscCall(MatZeroEntries(M)); 3703 /* 3704 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3705 rather than the slower MatSetValues(). 3706 */ 3707 M->was_assembled = PETSC_TRUE; 3708 M->assembled = PETSC_FALSE; 3709 } 3710 3711 /* (5) Set values of Msub to *newmat */ 3712 PetscCall(PetscMalloc1(count, &colsub)); 3713 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3714 3715 jj = aij->j; 3716 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3717 for (i = 0; i < m; i++) { 3718 row = rstart + i; 3719 nz = ii[i + 1] - ii[i]; 3720 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3721 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3722 jj += nz; 3723 aa += nz; 3724 } 3725 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3726 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3727 3728 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3729 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3730 3731 PetscCall(PetscFree(colsub)); 3732 3733 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3734 if (call == MAT_INITIAL_MATRIX) { 3735 *newmat = M; 3736 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3737 PetscCall(MatDestroy(&Msub)); 3738 3739 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3740 PetscCall(ISDestroy(&iscol_sub)); 3741 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3743 PetscCall(ISDestroy(&iscmap)); 3744 3745 if (iscol_local) { 3746 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3747 PetscCall(ISDestroy(&iscol_local)); 3748 } 3749 } 3750 PetscFunctionReturn(PETSC_SUCCESS); 3751 } 3752 3753 /* 3754 Not great since it makes two copies of the submatrix, first an SeqAIJ 3755 in local and then by concatenating the local matrices the end result. 3756 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3757 3758 This requires a sequential iscol with all indices. 3759 */ 3760 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3761 { 3762 PetscMPIInt rank, size; 3763 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3764 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3765 Mat M, Mreuse; 3766 MatScalar *aa, *vwork; 3767 MPI_Comm comm; 3768 Mat_SeqAIJ *aij; 3769 PetscBool colflag, allcolumns = PETSC_FALSE; 3770 3771 PetscFunctionBegin; 3772 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3773 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3774 PetscCallMPI(MPI_Comm_size(comm, &size)); 3775 3776 /* Check for special case: each processor gets entire matrix columns */ 3777 PetscCall(ISIdentity(iscol, &colflag)); 3778 PetscCall(ISGetLocalSize(iscol, &n)); 3779 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3780 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3781 3782 if (call == MAT_REUSE_MATRIX) { 3783 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3784 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3786 } else { 3787 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3788 } 3789 3790 /* 3791 m - number of local rows 3792 n - number of columns (same on all processors) 3793 rstart - first row in new global matrix generated 3794 */ 3795 PetscCall(MatGetSize(Mreuse, &m, &n)); 3796 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3797 if (call == MAT_INITIAL_MATRIX) { 3798 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3799 ii = aij->i; 3800 jj = aij->j; 3801 3802 /* 3803 Determine the number of non-zeros in the diagonal and off-diagonal 3804 portions of the matrix in order to do correct preallocation 3805 */ 3806 3807 /* first get start and end of "diagonal" columns */ 3808 if (csize == PETSC_DECIDE) { 3809 PetscCall(ISGetSize(isrow, &mglobal)); 3810 if (mglobal == n) { /* square matrix */ 3811 nlocal = m; 3812 } else { 3813 nlocal = n / size + ((n % size) > rank); 3814 } 3815 } else { 3816 nlocal = csize; 3817 } 3818 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3819 rstart = rend - nlocal; 3820 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3821 3822 /* next, compute all the lengths */ 3823 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3824 olens = dlens + m; 3825 for (i = 0; i < m; i++) { 3826 jend = ii[i + 1] - ii[i]; 3827 olen = 0; 3828 dlen = 0; 3829 for (j = 0; j < jend; j++) { 3830 if (*jj < rstart || *jj >= rend) olen++; 3831 else dlen++; 3832 jj++; 3833 } 3834 olens[i] = olen; 3835 dlens[i] = dlen; 3836 } 3837 PetscCall(MatCreate(comm, &M)); 3838 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3839 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3840 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3841 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3842 PetscCall(PetscFree(dlens)); 3843 } else { 3844 PetscInt ml, nl; 3845 3846 M = *newmat; 3847 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3848 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3849 PetscCall(MatZeroEntries(M)); 3850 /* 3851 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3852 rather than the slower MatSetValues(). 3853 */ 3854 M->was_assembled = PETSC_TRUE; 3855 M->assembled = PETSC_FALSE; 3856 } 3857 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3858 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3859 ii = aij->i; 3860 jj = aij->j; 3861 3862 /* trigger copy to CPU if needed */ 3863 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3864 for (i = 0; i < m; i++) { 3865 row = rstart + i; 3866 nz = ii[i + 1] - ii[i]; 3867 cwork = jj; 3868 jj = PetscSafePointerPlusOffset(jj, nz); 3869 vwork = aa; 3870 aa = PetscSafePointerPlusOffset(aa, nz); 3871 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3872 } 3873 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3874 3875 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3876 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3877 *newmat = M; 3878 3879 /* save submatrix used in processor for next request */ 3880 if (call == MAT_INITIAL_MATRIX) { 3881 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3882 PetscCall(MatDestroy(&Mreuse)); 3883 } 3884 PetscFunctionReturn(PETSC_SUCCESS); 3885 } 3886 3887 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3888 { 3889 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3890 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3891 const PetscInt *JJ; 3892 PetscBool nooffprocentries; 3893 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3894 3895 PetscFunctionBegin; 3896 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3897 3898 PetscCall(PetscLayoutSetUp(B->rmap)); 3899 PetscCall(PetscLayoutSetUp(B->cmap)); 3900 m = B->rmap->n; 3901 cstart = B->cmap->rstart; 3902 cend = B->cmap->rend; 3903 rstart = B->rmap->rstart; 3904 3905 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3906 3907 if (PetscDefined(USE_DEBUG)) { 3908 for (i = 0; i < m; i++) { 3909 nnz = Ii[i + 1] - Ii[i]; 3910 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3911 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3912 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3913 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3914 } 3915 } 3916 3917 for (i = 0; i < m; i++) { 3918 nnz = Ii[i + 1] - Ii[i]; 3919 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3920 nnz_max = PetscMax(nnz_max, nnz); 3921 d = 0; 3922 for (j = 0; j < nnz; j++) { 3923 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3924 } 3925 d_nnz[i] = d; 3926 o_nnz[i] = nnz - d; 3927 } 3928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3929 PetscCall(PetscFree2(d_nnz, o_nnz)); 3930 3931 for (i = 0; i < m; i++) { 3932 ii = i + rstart; 3933 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3934 } 3935 nooffprocentries = B->nooffprocentries; 3936 B->nooffprocentries = PETSC_TRUE; 3937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3939 B->nooffprocentries = nooffprocentries; 3940 3941 /* count number of entries below block diagonal */ 3942 PetscCall(PetscFree(Aij->ld)); 3943 PetscCall(PetscCalloc1(m, &ld)); 3944 Aij->ld = ld; 3945 for (i = 0; i < m; i++) { 3946 nnz = Ii[i + 1] - Ii[i]; 3947 j = 0; 3948 while (j < nnz && J[j] < cstart) j++; 3949 ld[i] = j; 3950 if (J) J += nnz; 3951 } 3952 3953 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3954 PetscFunctionReturn(PETSC_SUCCESS); 3955 } 3956 3957 /*@ 3958 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3959 (the default parallel PETSc format). 3960 3961 Collective 3962 3963 Input Parameters: 3964 + B - the matrix 3965 . i - the indices into `j` for the start of each local row (indices start with zero) 3966 . j - the column indices for each local row (indices start with zero) 3967 - v - optional values in the matrix 3968 3969 Level: developer 3970 3971 Notes: 3972 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3973 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3974 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3975 3976 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3977 3978 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3979 3980 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3981 3982 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3983 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3984 3985 The format which is used for the sparse matrix input, is equivalent to a 3986 row-major ordering.. i.e for the following matrix, the input data expected is 3987 as shown 3988 .vb 3989 1 0 0 3990 2 0 3 P0 3991 ------- 3992 4 5 6 P1 3993 3994 Process0 [P0] rows_owned=[0,1] 3995 i = {0,1,3} [size = nrow+1 = 2+1] 3996 j = {0,0,2} [size = 3] 3997 v = {1,2,3} [size = 3] 3998 3999 Process1 [P1] rows_owned=[2] 4000 i = {0,3} [size = nrow+1 = 1+1] 4001 j = {0,1,2} [size = 3] 4002 v = {4,5,6} [size = 3] 4003 .ve 4004 4005 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4006 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4007 @*/ 4008 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4009 { 4010 PetscFunctionBegin; 4011 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4012 PetscFunctionReturn(PETSC_SUCCESS); 4013 } 4014 4015 /*@ 4016 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4017 (the default parallel PETSc format). For good matrix assembly performance 4018 the user should preallocate the matrix storage by setting the parameters 4019 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4020 4021 Collective 4022 4023 Input Parameters: 4024 + B - the matrix 4025 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4026 (same value is used for all local rows) 4027 . d_nnz - array containing the number of nonzeros in the various rows of the 4028 DIAGONAL portion of the local submatrix (possibly different for each row) 4029 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4030 The size of this array is equal to the number of local rows, i.e 'm'. 4031 For matrices that will be factored, you must leave room for (and set) 4032 the diagonal entry even if it is zero. 4033 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4034 submatrix (same value is used for all local rows). 4035 - o_nnz - array containing the number of nonzeros in the various rows of the 4036 OFF-DIAGONAL portion of the local submatrix (possibly different for 4037 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4038 structure. The size of this array is equal to the number 4039 of local rows, i.e 'm'. 4040 4041 Example Usage: 4042 Consider the following 8x8 matrix with 34 non-zero values, that is 4043 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4044 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4045 as follows 4046 4047 .vb 4048 1 2 0 | 0 3 0 | 0 4 4049 Proc0 0 5 6 | 7 0 0 | 8 0 4050 9 0 10 | 11 0 0 | 12 0 4051 ------------------------------------- 4052 13 0 14 | 15 16 17 | 0 0 4053 Proc1 0 18 0 | 19 20 21 | 0 0 4054 0 0 0 | 22 23 0 | 24 0 4055 ------------------------------------- 4056 Proc2 25 26 27 | 0 0 28 | 29 0 4057 30 0 0 | 31 32 33 | 0 34 4058 .ve 4059 4060 This can be represented as a collection of submatrices as 4061 .vb 4062 A B C 4063 D E F 4064 G H I 4065 .ve 4066 4067 Where the submatrices A,B,C are owned by proc0, D,E,F are 4068 owned by proc1, G,H,I are owned by proc2. 4069 4070 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4072 The 'M','N' parameters are 8,8, and have the same values on all procs. 4073 4074 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4075 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4076 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4077 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4078 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4079 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4080 4081 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4082 allocated for every row of the local diagonal submatrix, and `o_nz` 4083 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4084 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4085 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4086 In this case, the values of `d_nz`, `o_nz` are 4087 .vb 4088 proc0 dnz = 2, o_nz = 2 4089 proc1 dnz = 3, o_nz = 2 4090 proc2 dnz = 1, o_nz = 4 4091 .ve 4092 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4093 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4094 for proc3. i.e we are using 12+15+10=37 storage locations to store 4095 34 values. 4096 4097 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4098 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4099 In the above case the values for `d_nnz`, `o_nnz` are 4100 .vb 4101 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4102 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4103 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4104 .ve 4105 Here the space allocated is sum of all the above values i.e 34, and 4106 hence pre-allocation is perfect. 4107 4108 Level: intermediate 4109 4110 Notes: 4111 If the *_nnz parameter is given then the *_nz parameter is ignored 4112 4113 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4114 storage. The stored row and column indices begin with zero. 4115 See [Sparse Matrices](sec_matsparse) for details. 4116 4117 The parallel matrix is partitioned such that the first m0 rows belong to 4118 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4119 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4120 4121 The DIAGONAL portion of the local submatrix of a processor can be defined 4122 as the submatrix which is obtained by extraction the part corresponding to 4123 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4124 first row that belongs to the processor, r2 is the last row belonging to 4125 the this processor, and c1-c2 is range of indices of the local part of a 4126 vector suitable for applying the matrix to. This is an mxn matrix. In the 4127 common case of a square matrix, the row and column ranges are the same and 4128 the DIAGONAL part is also square. The remaining portion of the local 4129 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4130 4131 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4132 4133 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4134 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4135 You can also run with the option `-info` and look for messages with the string 4136 malloc in them to see if additional memory allocation was needed. 4137 4138 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4139 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4140 @*/ 4141 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4142 { 4143 PetscFunctionBegin; 4144 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4145 PetscValidType(B, 1); 4146 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4147 PetscFunctionReturn(PETSC_SUCCESS); 4148 } 4149 4150 /*@ 4151 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4152 CSR format for the local rows. 4153 4154 Collective 4155 4156 Input Parameters: 4157 + comm - MPI communicator 4158 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4159 . n - This value should be the same as the local size used in creating the 4160 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4161 calculated if `N` is given) For square matrices n is almost always `m`. 4162 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4163 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4164 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4165 . j - global column indices 4166 - a - optional matrix values 4167 4168 Output Parameter: 4169 . mat - the matrix 4170 4171 Level: intermediate 4172 4173 Notes: 4174 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4175 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4176 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4177 4178 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4179 4180 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4181 4182 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4183 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4184 4185 The format which is used for the sparse matrix input, is equivalent to a 4186 row-major ordering, i.e., for the following matrix, the input data expected is 4187 as shown 4188 .vb 4189 1 0 0 4190 2 0 3 P0 4191 ------- 4192 4 5 6 P1 4193 4194 Process0 [P0] rows_owned=[0,1] 4195 i = {0,1,3} [size = nrow+1 = 2+1] 4196 j = {0,0,2} [size = 3] 4197 v = {1,2,3} [size = 3] 4198 4199 Process1 [P1] rows_owned=[2] 4200 i = {0,3} [size = nrow+1 = 1+1] 4201 j = {0,1,2} [size = 3] 4202 v = {4,5,6} [size = 3] 4203 .ve 4204 4205 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4206 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4207 @*/ 4208 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4209 { 4210 PetscFunctionBegin; 4211 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4212 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4213 PetscCall(MatCreate(comm, mat)); 4214 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4215 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4216 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4217 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4218 PetscFunctionReturn(PETSC_SUCCESS); 4219 } 4220 4221 /*@ 4222 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4223 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4224 from `MatCreateMPIAIJWithArrays()` 4225 4226 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4227 4228 Collective 4229 4230 Input Parameters: 4231 + mat - the matrix 4232 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4233 . n - This value should be the same as the local size used in creating the 4234 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4235 calculated if N is given) For square matrices n is almost always m. 4236 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4237 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4238 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4239 . J - column indices 4240 - v - matrix values 4241 4242 Level: deprecated 4243 4244 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4245 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4246 @*/ 4247 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4248 { 4249 PetscInt nnz, i; 4250 PetscBool nooffprocentries; 4251 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4252 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4253 PetscScalar *ad, *ao; 4254 PetscInt ldi, Iii, md; 4255 const PetscInt *Adi = Ad->i; 4256 PetscInt *ld = Aij->ld; 4257 4258 PetscFunctionBegin; 4259 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4260 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4261 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4262 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4263 4264 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4265 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4266 4267 for (i = 0; i < m; i++) { 4268 if (PetscDefined(USE_DEBUG)) { 4269 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4270 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4271 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4272 } 4273 } 4274 nnz = Ii[i + 1] - Ii[i]; 4275 Iii = Ii[i]; 4276 ldi = ld[i]; 4277 md = Adi[i + 1] - Adi[i]; 4278 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4279 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4280 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4281 ad += md; 4282 ao += nnz - md; 4283 } 4284 nooffprocentries = mat->nooffprocentries; 4285 mat->nooffprocentries = PETSC_TRUE; 4286 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4287 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4289 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4290 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4291 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4292 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4293 mat->nooffprocentries = nooffprocentries; 4294 PetscFunctionReturn(PETSC_SUCCESS); 4295 } 4296 4297 /*@ 4298 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4299 4300 Collective 4301 4302 Input Parameters: 4303 + mat - the matrix 4304 - v - matrix values, stored by row 4305 4306 Level: intermediate 4307 4308 Notes: 4309 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4310 4311 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4312 4313 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4314 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4315 @*/ 4316 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4317 { 4318 PetscInt nnz, i, m; 4319 PetscBool nooffprocentries; 4320 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4321 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4322 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4323 PetscScalar *ad, *ao; 4324 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4325 PetscInt ldi, Iii, md; 4326 PetscInt *ld = Aij->ld; 4327 4328 PetscFunctionBegin; 4329 m = mat->rmap->n; 4330 4331 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4333 Iii = 0; 4334 for (i = 0; i < m; i++) { 4335 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4336 ldi = ld[i]; 4337 md = Adi[i + 1] - Adi[i]; 4338 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4339 ad += md; 4340 if (ao) { 4341 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4342 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4343 ao += nnz - md; 4344 } 4345 Iii += nnz; 4346 } 4347 nooffprocentries = mat->nooffprocentries; 4348 mat->nooffprocentries = PETSC_TRUE; 4349 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4350 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4352 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4353 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4354 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4355 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4356 mat->nooffprocentries = nooffprocentries; 4357 PetscFunctionReturn(PETSC_SUCCESS); 4358 } 4359 4360 /*@ 4361 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4362 (the default parallel PETSc format). For good matrix assembly performance 4363 the user should preallocate the matrix storage by setting the parameters 4364 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4365 4366 Collective 4367 4368 Input Parameters: 4369 + comm - MPI communicator 4370 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4371 This value should be the same as the local size used in creating the 4372 y vector for the matrix-vector product y = Ax. 4373 . n - This value should be the same as the local size used in creating the 4374 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4375 calculated if N is given) For square matrices n is almost always m. 4376 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4377 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4378 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4379 (same value is used for all local rows) 4380 . d_nnz - array containing the number of nonzeros in the various rows of the 4381 DIAGONAL portion of the local submatrix (possibly different for each row) 4382 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4383 The size of this array is equal to the number of local rows, i.e 'm'. 4384 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4385 submatrix (same value is used for all local rows). 4386 - o_nnz - array containing the number of nonzeros in the various rows of the 4387 OFF-DIAGONAL portion of the local submatrix (possibly different for 4388 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4389 structure. The size of this array is equal to the number 4390 of local rows, i.e 'm'. 4391 4392 Output Parameter: 4393 . A - the matrix 4394 4395 Options Database Keys: 4396 + -mat_no_inode - Do not use inodes 4397 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4398 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4399 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4400 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4401 4402 Level: intermediate 4403 4404 Notes: 4405 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4406 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4407 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4408 4409 If the *_nnz parameter is given then the *_nz parameter is ignored 4410 4411 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4412 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4413 storage requirements for this matrix. 4414 4415 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4416 processor than it must be used on all processors that share the object for 4417 that argument. 4418 4419 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4420 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4421 4422 The user MUST specify either the local or global matrix dimensions 4423 (possibly both). 4424 4425 The parallel matrix is partitioned across processors such that the 4426 first `m0` rows belong to process 0, the next `m1` rows belong to 4427 process 1, the next `m2` rows belong to process 2, etc., where 4428 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4429 values corresponding to [m x N] submatrix. 4430 4431 The columns are logically partitioned with the n0 columns belonging 4432 to 0th partition, the next n1 columns belonging to the next 4433 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4434 4435 The DIAGONAL portion of the local submatrix on any given processor 4436 is the submatrix corresponding to the rows and columns m,n 4437 corresponding to the given processor. i.e diagonal matrix on 4438 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4439 etc. The remaining portion of the local submatrix [m x (N-n)] 4440 constitute the OFF-DIAGONAL portion. The example below better 4441 illustrates this concept. 4442 4443 For a square global matrix we define each processor's diagonal portion 4444 to be its local rows and the corresponding columns (a square submatrix); 4445 each processor's off-diagonal portion encompasses the remainder of the 4446 local matrix (a rectangular submatrix). 4447 4448 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4449 4450 When calling this routine with a single process communicator, a matrix of 4451 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4452 type of communicator, use the construction mechanism 4453 .vb 4454 MatCreate(..., &A); 4455 MatSetType(A, MATMPIAIJ); 4456 MatSetSizes(A, m, n, M, N); 4457 MatMPIAIJSetPreallocation(A, ...); 4458 .ve 4459 4460 By default, this format uses inodes (identical nodes) when possible. 4461 We search for consecutive rows with the same nonzero structure, thereby 4462 reusing matrix information to achieve increased efficiency. 4463 4464 Example Usage: 4465 Consider the following 8x8 matrix with 34 non-zero values, that is 4466 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4467 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4468 as follows 4469 4470 .vb 4471 1 2 0 | 0 3 0 | 0 4 4472 Proc0 0 5 6 | 7 0 0 | 8 0 4473 9 0 10 | 11 0 0 | 12 0 4474 ------------------------------------- 4475 13 0 14 | 15 16 17 | 0 0 4476 Proc1 0 18 0 | 19 20 21 | 0 0 4477 0 0 0 | 22 23 0 | 24 0 4478 ------------------------------------- 4479 Proc2 25 26 27 | 0 0 28 | 29 0 4480 30 0 0 | 31 32 33 | 0 34 4481 .ve 4482 4483 This can be represented as a collection of submatrices as 4484 4485 .vb 4486 A B C 4487 D E F 4488 G H I 4489 .ve 4490 4491 Where the submatrices A,B,C are owned by proc0, D,E,F are 4492 owned by proc1, G,H,I are owned by proc2. 4493 4494 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4496 The 'M','N' parameters are 8,8, and have the same values on all procs. 4497 4498 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4499 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4500 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4501 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4502 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4503 matrix, ans [DF] as another SeqAIJ matrix. 4504 4505 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4506 allocated for every row of the local diagonal submatrix, and `o_nz` 4507 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4508 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4509 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4510 In this case, the values of `d_nz`,`o_nz` are 4511 .vb 4512 proc0 dnz = 2, o_nz = 2 4513 proc1 dnz = 3, o_nz = 2 4514 proc2 dnz = 1, o_nz = 4 4515 .ve 4516 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4517 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4518 for proc3. i.e we are using 12+15+10=37 storage locations to store 4519 34 values. 4520 4521 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4522 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4523 In the above case the values for d_nnz,o_nnz are 4524 .vb 4525 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4526 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4527 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4528 .ve 4529 Here the space allocated is sum of all the above values i.e 34, and 4530 hence pre-allocation is perfect. 4531 4532 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4533 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4534 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4535 @*/ 4536 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4537 { 4538 PetscMPIInt size; 4539 4540 PetscFunctionBegin; 4541 PetscCall(MatCreate(comm, A)); 4542 PetscCall(MatSetSizes(*A, m, n, M, N)); 4543 PetscCallMPI(MPI_Comm_size(comm, &size)); 4544 if (size > 1) { 4545 PetscCall(MatSetType(*A, MATMPIAIJ)); 4546 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4547 } else { 4548 PetscCall(MatSetType(*A, MATSEQAIJ)); 4549 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4550 } 4551 PetscFunctionReturn(PETSC_SUCCESS); 4552 } 4553 4554 /*MC 4555 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4556 4557 Synopsis: 4558 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4559 4560 Not Collective 4561 4562 Input Parameter: 4563 . A - the `MATMPIAIJ` matrix 4564 4565 Output Parameters: 4566 + Ad - the diagonal portion of the matrix 4567 . Ao - the off-diagonal portion of the matrix 4568 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4569 - ierr - error code 4570 4571 Level: advanced 4572 4573 Note: 4574 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4575 4576 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4577 M*/ 4578 4579 /*MC 4580 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4581 4582 Synopsis: 4583 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4584 4585 Not Collective 4586 4587 Input Parameters: 4588 + A - the `MATMPIAIJ` matrix 4589 . Ad - the diagonal portion of the matrix 4590 . Ao - the off-diagonal portion of the matrix 4591 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4592 - ierr - error code 4593 4594 Level: advanced 4595 4596 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4597 M*/ 4598 4599 /*@C 4600 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4601 4602 Not Collective 4603 4604 Input Parameter: 4605 . A - The `MATMPIAIJ` matrix 4606 4607 Output Parameters: 4608 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4609 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4610 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4611 4612 Level: intermediate 4613 4614 Note: 4615 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4616 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4617 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4618 local column numbers to global column numbers in the original matrix. 4619 4620 Fortran Notes: 4621 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4622 4623 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4624 @*/ 4625 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4626 { 4627 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4628 PetscBool flg; 4629 4630 PetscFunctionBegin; 4631 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4632 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4633 if (Ad) *Ad = a->A; 4634 if (Ao) *Ao = a->B; 4635 if (colmap) *colmap = a->garray; 4636 PetscFunctionReturn(PETSC_SUCCESS); 4637 } 4638 4639 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4640 { 4641 PetscInt m, N, i, rstart, nnz, Ii; 4642 PetscInt *indx; 4643 PetscScalar *values; 4644 MatType rootType; 4645 4646 PetscFunctionBegin; 4647 PetscCall(MatGetSize(inmat, &m, &N)); 4648 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4649 PetscInt *dnz, *onz, sum, bs, cbs; 4650 4651 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4652 /* Check sum(n) = N */ 4653 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4654 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4655 4656 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4657 rstart -= m; 4658 4659 MatPreallocateBegin(comm, m, n, dnz, onz); 4660 for (i = 0; i < m; i++) { 4661 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4662 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4663 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4664 } 4665 4666 PetscCall(MatCreate(comm, outmat)); 4667 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4668 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4669 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4670 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4671 PetscCall(MatSetType(*outmat, rootType)); 4672 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4673 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4674 MatPreallocateEnd(dnz, onz); 4675 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4676 } 4677 4678 /* numeric phase */ 4679 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4680 for (i = 0; i < m; i++) { 4681 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4682 Ii = i + rstart; 4683 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4684 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4685 } 4686 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4687 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4688 PetscFunctionReturn(PETSC_SUCCESS); 4689 } 4690 4691 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4692 { 4693 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4694 4695 PetscFunctionBegin; 4696 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4697 PetscCall(PetscFree(merge->id_r)); 4698 PetscCall(PetscFree(merge->len_s)); 4699 PetscCall(PetscFree(merge->len_r)); 4700 PetscCall(PetscFree(merge->bi)); 4701 PetscCall(PetscFree(merge->bj)); 4702 PetscCall(PetscFree(merge->buf_ri[0])); 4703 PetscCall(PetscFree(merge->buf_ri)); 4704 PetscCall(PetscFree(merge->buf_rj[0])); 4705 PetscCall(PetscFree(merge->buf_rj)); 4706 PetscCall(PetscFree(merge->coi)); 4707 PetscCall(PetscFree(merge->coj)); 4708 PetscCall(PetscFree(merge->owners_co)); 4709 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4710 PetscCall(PetscFree(merge)); 4711 PetscFunctionReturn(PETSC_SUCCESS); 4712 } 4713 4714 #include <../src/mat/utils/freespace.h> 4715 #include <petscbt.h> 4716 4717 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4718 { 4719 MPI_Comm comm; 4720 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4721 PetscMPIInt size, rank, taga, *len_s; 4722 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4723 PetscInt proc, m; 4724 PetscInt **buf_ri, **buf_rj; 4725 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4726 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4727 MPI_Request *s_waits, *r_waits; 4728 MPI_Status *status; 4729 const MatScalar *aa, *a_a; 4730 MatScalar **abuf_r, *ba_i; 4731 Mat_Merge_SeqsToMPI *merge; 4732 PetscContainer container; 4733 4734 PetscFunctionBegin; 4735 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4736 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4737 4738 PetscCallMPI(MPI_Comm_size(comm, &size)); 4739 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4740 4741 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4742 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4743 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4744 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4745 aa = a_a; 4746 4747 bi = merge->bi; 4748 bj = merge->bj; 4749 buf_ri = merge->buf_ri; 4750 buf_rj = merge->buf_rj; 4751 4752 PetscCall(PetscMalloc1(size, &status)); 4753 owners = merge->rowmap->range; 4754 len_s = merge->len_s; 4755 4756 /* send and recv matrix values */ 4757 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4758 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4759 4760 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4761 for (proc = 0, k = 0; proc < size; proc++) { 4762 if (!len_s[proc]) continue; 4763 i = owners[proc]; 4764 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4765 k++; 4766 } 4767 4768 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4769 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4770 PetscCall(PetscFree(status)); 4771 4772 PetscCall(PetscFree(s_waits)); 4773 PetscCall(PetscFree(r_waits)); 4774 4775 /* insert mat values of mpimat */ 4776 PetscCall(PetscMalloc1(N, &ba_i)); 4777 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4778 4779 for (k = 0; k < merge->nrecv; k++) { 4780 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4781 nrows = *buf_ri_k[k]; 4782 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4783 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4784 } 4785 4786 /* set values of ba */ 4787 m = merge->rowmap->n; 4788 for (i = 0; i < m; i++) { 4789 arow = owners[rank] + i; 4790 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4791 bnzi = bi[i + 1] - bi[i]; 4792 PetscCall(PetscArrayzero(ba_i, bnzi)); 4793 4794 /* add local non-zero vals of this proc's seqmat into ba */ 4795 anzi = ai[arow + 1] - ai[arow]; 4796 aj = a->j + ai[arow]; 4797 aa = a_a + ai[arow]; 4798 nextaj = 0; 4799 for (j = 0; nextaj < anzi; j++) { 4800 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4801 ba_i[j] += aa[nextaj++]; 4802 } 4803 } 4804 4805 /* add received vals into ba */ 4806 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4807 /* i-th row */ 4808 if (i == *nextrow[k]) { 4809 anzi = *(nextai[k] + 1) - *nextai[k]; 4810 aj = buf_rj[k] + *nextai[k]; 4811 aa = abuf_r[k] + *nextai[k]; 4812 nextaj = 0; 4813 for (j = 0; nextaj < anzi; j++) { 4814 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4815 ba_i[j] += aa[nextaj++]; 4816 } 4817 } 4818 nextrow[k]++; 4819 nextai[k]++; 4820 } 4821 } 4822 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4823 } 4824 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4825 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4826 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4827 4828 PetscCall(PetscFree(abuf_r[0])); 4829 PetscCall(PetscFree(abuf_r)); 4830 PetscCall(PetscFree(ba_i)); 4831 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4832 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4833 PetscFunctionReturn(PETSC_SUCCESS); 4834 } 4835 4836 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4837 { 4838 Mat B_mpi; 4839 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4840 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4841 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4842 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4843 PetscInt len, proc, *dnz, *onz, bs, cbs; 4844 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4845 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4846 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4847 MPI_Status *status; 4848 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4849 PetscBT lnkbt; 4850 Mat_Merge_SeqsToMPI *merge; 4851 PetscContainer container; 4852 4853 PetscFunctionBegin; 4854 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4855 4856 /* make sure it is a PETSc comm */ 4857 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4858 PetscCallMPI(MPI_Comm_size(comm, &size)); 4859 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4860 4861 PetscCall(PetscNew(&merge)); 4862 PetscCall(PetscMalloc1(size, &status)); 4863 4864 /* determine row ownership */ 4865 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4866 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4867 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4868 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4869 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4870 PetscCall(PetscMalloc1(size, &len_si)); 4871 PetscCall(PetscMalloc1(size, &merge->len_s)); 4872 4873 m = merge->rowmap->n; 4874 owners = merge->rowmap->range; 4875 4876 /* determine the number of messages to send, their lengths */ 4877 len_s = merge->len_s; 4878 4879 len = 0; /* length of buf_si[] */ 4880 merge->nsend = 0; 4881 for (proc = 0; proc < size; proc++) { 4882 len_si[proc] = 0; 4883 if (proc == rank) { 4884 len_s[proc] = 0; 4885 } else { 4886 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4887 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4888 } 4889 if (len_s[proc]) { 4890 merge->nsend++; 4891 nrows = 0; 4892 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4893 if (ai[i + 1] > ai[i]) nrows++; 4894 } 4895 len_si[proc] = 2 * (nrows + 1); 4896 len += len_si[proc]; 4897 } 4898 } 4899 4900 /* determine the number and length of messages to receive for ij-structure */ 4901 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4902 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4903 4904 /* post the Irecv of j-structure */ 4905 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4906 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4907 4908 /* post the Isend of j-structure */ 4909 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4910 4911 for (proc = 0, k = 0; proc < size; proc++) { 4912 if (!len_s[proc]) continue; 4913 i = owners[proc]; 4914 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4915 k++; 4916 } 4917 4918 /* receives and sends of j-structure are complete */ 4919 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4920 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4921 4922 /* send and recv i-structure */ 4923 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4924 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4925 4926 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4927 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4928 for (proc = 0, k = 0; proc < size; proc++) { 4929 if (!len_s[proc]) continue; 4930 /* form outgoing message for i-structure: 4931 buf_si[0]: nrows to be sent 4932 [1:nrows]: row index (global) 4933 [nrows+1:2*nrows+1]: i-structure index 4934 */ 4935 nrows = len_si[proc] / 2 - 1; 4936 buf_si_i = buf_si + nrows + 1; 4937 buf_si[0] = nrows; 4938 buf_si_i[0] = 0; 4939 nrows = 0; 4940 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4941 anzi = ai[i + 1] - ai[i]; 4942 if (anzi) { 4943 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4944 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4945 nrows++; 4946 } 4947 } 4948 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4949 k++; 4950 buf_si += len_si[proc]; 4951 } 4952 4953 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4954 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4955 4956 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4957 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4958 4959 PetscCall(PetscFree(len_si)); 4960 PetscCall(PetscFree(len_ri)); 4961 PetscCall(PetscFree(rj_waits)); 4962 PetscCall(PetscFree2(si_waits, sj_waits)); 4963 PetscCall(PetscFree(ri_waits)); 4964 PetscCall(PetscFree(buf_s)); 4965 PetscCall(PetscFree(status)); 4966 4967 /* compute a local seq matrix in each processor */ 4968 /* allocate bi array and free space for accumulating nonzero column info */ 4969 PetscCall(PetscMalloc1(m + 1, &bi)); 4970 bi[0] = 0; 4971 4972 /* create and initialize a linked list */ 4973 nlnk = N + 1; 4974 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4975 4976 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4977 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4978 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4979 4980 current_space = free_space; 4981 4982 /* determine symbolic info for each local row */ 4983 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4984 4985 for (k = 0; k < merge->nrecv; k++) { 4986 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4987 nrows = *buf_ri_k[k]; 4988 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4989 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4990 } 4991 4992 MatPreallocateBegin(comm, m, n, dnz, onz); 4993 len = 0; 4994 for (i = 0; i < m; i++) { 4995 bnzi = 0; 4996 /* add local non-zero cols of this proc's seqmat into lnk */ 4997 arow = owners[rank] + i; 4998 anzi = ai[arow + 1] - ai[arow]; 4999 aj = a->j + ai[arow]; 5000 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5001 bnzi += nlnk; 5002 /* add received col data into lnk */ 5003 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5004 if (i == *nextrow[k]) { /* i-th row */ 5005 anzi = *(nextai[k] + 1) - *nextai[k]; 5006 aj = buf_rj[k] + *nextai[k]; 5007 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5008 bnzi += nlnk; 5009 nextrow[k]++; 5010 nextai[k]++; 5011 } 5012 } 5013 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5014 5015 /* if free space is not available, make more free space */ 5016 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5017 /* copy data into free space, then initialize lnk */ 5018 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5019 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5020 5021 current_space->array += bnzi; 5022 current_space->local_used += bnzi; 5023 current_space->local_remaining -= bnzi; 5024 5025 bi[i + 1] = bi[i] + bnzi; 5026 } 5027 5028 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5029 5030 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5031 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5032 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5033 5034 /* create symbolic parallel matrix B_mpi */ 5035 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5036 PetscCall(MatCreate(comm, &B_mpi)); 5037 if (n == PETSC_DECIDE) { 5038 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5039 } else { 5040 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5041 } 5042 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5043 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5044 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5045 MatPreallocateEnd(dnz, onz); 5046 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5047 5048 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5049 B_mpi->assembled = PETSC_FALSE; 5050 merge->bi = bi; 5051 merge->bj = bj; 5052 merge->buf_ri = buf_ri; 5053 merge->buf_rj = buf_rj; 5054 merge->coi = NULL; 5055 merge->coj = NULL; 5056 merge->owners_co = NULL; 5057 5058 PetscCall(PetscCommDestroy(&comm)); 5059 5060 /* attach the supporting struct to B_mpi for reuse */ 5061 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5062 PetscCall(PetscContainerSetPointer(container, merge)); 5063 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5064 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5065 PetscCall(PetscContainerDestroy(&container)); 5066 *mpimat = B_mpi; 5067 5068 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5069 PetscFunctionReturn(PETSC_SUCCESS); 5070 } 5071 5072 /*@ 5073 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5074 matrices from each processor 5075 5076 Collective 5077 5078 Input Parameters: 5079 + comm - the communicators the parallel matrix will live on 5080 . seqmat - the input sequential matrices 5081 . m - number of local rows (or `PETSC_DECIDE`) 5082 . n - number of local columns (or `PETSC_DECIDE`) 5083 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5084 5085 Output Parameter: 5086 . mpimat - the parallel matrix generated 5087 5088 Level: advanced 5089 5090 Note: 5091 The dimensions of the sequential matrix in each processor MUST be the same. 5092 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5093 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5094 5095 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5096 @*/ 5097 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5098 { 5099 PetscMPIInt size; 5100 5101 PetscFunctionBegin; 5102 PetscCallMPI(MPI_Comm_size(comm, &size)); 5103 if (size == 1) { 5104 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5105 if (scall == MAT_INITIAL_MATRIX) { 5106 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5107 } else { 5108 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5109 } 5110 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 PetscFunctionReturn(PETSC_SUCCESS); 5112 } 5113 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5115 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5116 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5117 PetscFunctionReturn(PETSC_SUCCESS); 5118 } 5119 5120 /*@ 5121 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5122 5123 Not Collective 5124 5125 Input Parameter: 5126 . A - the matrix 5127 5128 Output Parameter: 5129 . A_loc - the local sequential matrix generated 5130 5131 Level: developer 5132 5133 Notes: 5134 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5135 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5136 `n` is the global column count obtained with `MatGetSize()` 5137 5138 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5139 5140 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5141 5142 Destroy the matrix with `MatDestroy()` 5143 5144 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5145 @*/ 5146 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5147 { 5148 PetscBool mpi; 5149 5150 PetscFunctionBegin; 5151 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5152 if (mpi) { 5153 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5154 } else { 5155 *A_loc = A; 5156 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5157 } 5158 PetscFunctionReturn(PETSC_SUCCESS); 5159 } 5160 5161 /*@ 5162 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5163 5164 Not Collective 5165 5166 Input Parameters: 5167 + A - the matrix 5168 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5169 5170 Output Parameter: 5171 . A_loc - the local sequential matrix generated 5172 5173 Level: developer 5174 5175 Notes: 5176 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5177 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5178 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5179 5180 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5181 5182 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5183 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5184 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5185 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5186 5187 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5188 @*/ 5189 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5190 { 5191 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5192 Mat_SeqAIJ *mat, *a, *b; 5193 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5194 const PetscScalar *aa, *ba, *aav, *bav; 5195 PetscScalar *ca, *cam; 5196 PetscMPIInt size; 5197 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5198 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5199 PetscBool match; 5200 5201 PetscFunctionBegin; 5202 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5203 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5204 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5205 if (size == 1) { 5206 if (scall == MAT_INITIAL_MATRIX) { 5207 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5208 *A_loc = mpimat->A; 5209 } else if (scall == MAT_REUSE_MATRIX) { 5210 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5211 } 5212 PetscFunctionReturn(PETSC_SUCCESS); 5213 } 5214 5215 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5216 a = (Mat_SeqAIJ *)mpimat->A->data; 5217 b = (Mat_SeqAIJ *)mpimat->B->data; 5218 ai = a->i; 5219 aj = a->j; 5220 bi = b->i; 5221 bj = b->j; 5222 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5223 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5224 aa = aav; 5225 ba = bav; 5226 if (scall == MAT_INITIAL_MATRIX) { 5227 PetscCall(PetscMalloc1(1 + am, &ci)); 5228 ci[0] = 0; 5229 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5230 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5231 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5232 k = 0; 5233 for (i = 0; i < am; i++) { 5234 ncols_o = bi[i + 1] - bi[i]; 5235 ncols_d = ai[i + 1] - ai[i]; 5236 /* off-diagonal portion of A */ 5237 for (jo = 0; jo < ncols_o; jo++) { 5238 col = cmap[*bj]; 5239 if (col >= cstart) break; 5240 cj[k] = col; 5241 bj++; 5242 ca[k++] = *ba++; 5243 } 5244 /* diagonal portion of A */ 5245 for (j = 0; j < ncols_d; j++) { 5246 cj[k] = cstart + *aj++; 5247 ca[k++] = *aa++; 5248 } 5249 /* off-diagonal portion of A */ 5250 for (j = jo; j < ncols_o; j++) { 5251 cj[k] = cmap[*bj++]; 5252 ca[k++] = *ba++; 5253 } 5254 } 5255 /* put together the new matrix */ 5256 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5257 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5258 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5259 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5260 mat->free_a = PETSC_TRUE; 5261 mat->free_ij = PETSC_TRUE; 5262 mat->nonew = 0; 5263 } else if (scall == MAT_REUSE_MATRIX) { 5264 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5265 ci = mat->i; 5266 cj = mat->j; 5267 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5268 for (i = 0; i < am; i++) { 5269 /* off-diagonal portion of A */ 5270 ncols_o = bi[i + 1] - bi[i]; 5271 for (jo = 0; jo < ncols_o; jo++) { 5272 col = cmap[*bj]; 5273 if (col >= cstart) break; 5274 *cam++ = *ba++; 5275 bj++; 5276 } 5277 /* diagonal portion of A */ 5278 ncols_d = ai[i + 1] - ai[i]; 5279 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5280 /* off-diagonal portion of A */ 5281 for (j = jo; j < ncols_o; j++) { 5282 *cam++ = *ba++; 5283 bj++; 5284 } 5285 } 5286 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5287 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5288 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5289 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5290 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5291 PetscFunctionReturn(PETSC_SUCCESS); 5292 } 5293 5294 /*@ 5295 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5296 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5297 5298 Not Collective 5299 5300 Input Parameters: 5301 + A - the matrix 5302 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5303 5304 Output Parameters: 5305 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5306 - A_loc - the local sequential matrix generated 5307 5308 Level: developer 5309 5310 Note: 5311 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5312 part, then those associated with the off-diagonal part (in its local ordering) 5313 5314 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5315 @*/ 5316 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5317 { 5318 Mat Ao, Ad; 5319 const PetscInt *cmap; 5320 PetscMPIInt size; 5321 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5322 5323 PetscFunctionBegin; 5324 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5325 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5326 if (size == 1) { 5327 if (scall == MAT_INITIAL_MATRIX) { 5328 PetscCall(PetscObjectReference((PetscObject)Ad)); 5329 *A_loc = Ad; 5330 } else if (scall == MAT_REUSE_MATRIX) { 5331 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5332 } 5333 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5334 PetscFunctionReturn(PETSC_SUCCESS); 5335 } 5336 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5337 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5338 if (f) { 5339 PetscCall((*f)(A, scall, glob, A_loc)); 5340 } else { 5341 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5342 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5343 Mat_SeqAIJ *c; 5344 PetscInt *ai = a->i, *aj = a->j; 5345 PetscInt *bi = b->i, *bj = b->j; 5346 PetscInt *ci, *cj; 5347 const PetscScalar *aa, *ba; 5348 PetscScalar *ca; 5349 PetscInt i, j, am, dn, on; 5350 5351 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5352 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5353 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5354 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5355 if (scall == MAT_INITIAL_MATRIX) { 5356 PetscInt k; 5357 PetscCall(PetscMalloc1(1 + am, &ci)); 5358 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5359 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5360 ci[0] = 0; 5361 for (i = 0, k = 0; i < am; i++) { 5362 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5363 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5364 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5365 /* diagonal portion of A */ 5366 for (j = 0; j < ncols_d; j++, k++) { 5367 cj[k] = *aj++; 5368 ca[k] = *aa++; 5369 } 5370 /* off-diagonal portion of A */ 5371 for (j = 0; j < ncols_o; j++, k++) { 5372 cj[k] = dn + *bj++; 5373 ca[k] = *ba++; 5374 } 5375 } 5376 /* put together the new matrix */ 5377 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5378 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5379 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5380 c = (Mat_SeqAIJ *)(*A_loc)->data; 5381 c->free_a = PETSC_TRUE; 5382 c->free_ij = PETSC_TRUE; 5383 c->nonew = 0; 5384 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5385 } else if (scall == MAT_REUSE_MATRIX) { 5386 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5387 for (i = 0; i < am; i++) { 5388 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5389 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5390 /* diagonal portion of A */ 5391 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5392 /* off-diagonal portion of A */ 5393 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5394 } 5395 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5396 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5397 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5398 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5399 if (glob) { 5400 PetscInt cst, *gidx; 5401 5402 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5403 PetscCall(PetscMalloc1(dn + on, &gidx)); 5404 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5405 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5406 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5407 } 5408 } 5409 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5410 PetscFunctionReturn(PETSC_SUCCESS); 5411 } 5412 5413 /*@C 5414 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5415 5416 Not Collective 5417 5418 Input Parameters: 5419 + A - the matrix 5420 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5421 . row - index set of rows to extract (or `NULL`) 5422 - col - index set of columns to extract (or `NULL`) 5423 5424 Output Parameter: 5425 . A_loc - the local sequential matrix generated 5426 5427 Level: developer 5428 5429 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5430 @*/ 5431 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5432 { 5433 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5434 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5435 IS isrowa, iscola; 5436 Mat *aloc; 5437 PetscBool match; 5438 5439 PetscFunctionBegin; 5440 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5441 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5442 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5443 if (!row) { 5444 start = A->rmap->rstart; 5445 end = A->rmap->rend; 5446 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5447 } else { 5448 isrowa = *row; 5449 } 5450 if (!col) { 5451 start = A->cmap->rstart; 5452 cmap = a->garray; 5453 nzA = a->A->cmap->n; 5454 nzB = a->B->cmap->n; 5455 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5456 ncols = 0; 5457 for (i = 0; i < nzB; i++) { 5458 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5459 else break; 5460 } 5461 imark = i; 5462 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5463 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5464 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5465 } else { 5466 iscola = *col; 5467 } 5468 if (scall != MAT_INITIAL_MATRIX) { 5469 PetscCall(PetscMalloc1(1, &aloc)); 5470 aloc[0] = *A_loc; 5471 } 5472 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5473 if (!col) { /* attach global id of condensed columns */ 5474 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5475 } 5476 *A_loc = aloc[0]; 5477 PetscCall(PetscFree(aloc)); 5478 if (!row) PetscCall(ISDestroy(&isrowa)); 5479 if (!col) PetscCall(ISDestroy(&iscola)); 5480 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5481 PetscFunctionReturn(PETSC_SUCCESS); 5482 } 5483 5484 /* 5485 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5486 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5487 * on a global size. 5488 * */ 5489 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5490 { 5491 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5492 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5493 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5494 PetscMPIInt owner; 5495 PetscSFNode *iremote, *oiremote; 5496 const PetscInt *lrowindices; 5497 PetscSF sf, osf; 5498 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5499 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5500 MPI_Comm comm; 5501 ISLocalToGlobalMapping mapping; 5502 const PetscScalar *pd_a, *po_a; 5503 5504 PetscFunctionBegin; 5505 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5506 /* plocalsize is the number of roots 5507 * nrows is the number of leaves 5508 * */ 5509 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5510 PetscCall(ISGetLocalSize(rows, &nrows)); 5511 PetscCall(PetscCalloc1(nrows, &iremote)); 5512 PetscCall(ISGetIndices(rows, &lrowindices)); 5513 for (i = 0; i < nrows; i++) { 5514 /* Find a remote index and an owner for a row 5515 * The row could be local or remote 5516 * */ 5517 owner = 0; 5518 lidx = 0; 5519 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5520 iremote[i].index = lidx; 5521 iremote[i].rank = owner; 5522 } 5523 /* Create SF to communicate how many nonzero columns for each row */ 5524 PetscCall(PetscSFCreate(comm, &sf)); 5525 /* SF will figure out the number of nonzero columns for each row, and their 5526 * offsets 5527 * */ 5528 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5529 PetscCall(PetscSFSetFromOptions(sf)); 5530 PetscCall(PetscSFSetUp(sf)); 5531 5532 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5533 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5534 PetscCall(PetscCalloc1(nrows, &pnnz)); 5535 roffsets[0] = 0; 5536 roffsets[1] = 0; 5537 for (i = 0; i < plocalsize; i++) { 5538 /* diagonal */ 5539 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5540 /* off-diagonal */ 5541 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5542 /* compute offsets so that we relative location for each row */ 5543 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5544 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5545 } 5546 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5547 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5548 /* 'r' means root, and 'l' means leaf */ 5549 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5552 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5553 PetscCall(PetscSFDestroy(&sf)); 5554 PetscCall(PetscFree(roffsets)); 5555 PetscCall(PetscFree(nrcols)); 5556 dntotalcols = 0; 5557 ontotalcols = 0; 5558 ncol = 0; 5559 for (i = 0; i < nrows; i++) { 5560 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5561 ncol = PetscMax(pnnz[i], ncol); 5562 /* diagonal */ 5563 dntotalcols += nlcols[i * 2 + 0]; 5564 /* off-diagonal */ 5565 ontotalcols += nlcols[i * 2 + 1]; 5566 } 5567 /* We do not need to figure the right number of columns 5568 * since all the calculations will be done by going through the raw data 5569 * */ 5570 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5571 PetscCall(MatSetUp(*P_oth)); 5572 PetscCall(PetscFree(pnnz)); 5573 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5574 /* diagonal */ 5575 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5576 /* off-diagonal */ 5577 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5578 /* diagonal */ 5579 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5580 /* off-diagonal */ 5581 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5582 dntotalcols = 0; 5583 ontotalcols = 0; 5584 ntotalcols = 0; 5585 for (i = 0; i < nrows; i++) { 5586 owner = 0; 5587 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5588 /* Set iremote for diag matrix */ 5589 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5590 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5591 iremote[dntotalcols].rank = owner; 5592 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5593 ilocal[dntotalcols++] = ntotalcols++; 5594 } 5595 /* off-diagonal */ 5596 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5597 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5598 oiremote[ontotalcols].rank = owner; 5599 oilocal[ontotalcols++] = ntotalcols++; 5600 } 5601 } 5602 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5603 PetscCall(PetscFree(loffsets)); 5604 PetscCall(PetscFree(nlcols)); 5605 PetscCall(PetscSFCreate(comm, &sf)); 5606 /* P serves as roots and P_oth is leaves 5607 * Diag matrix 5608 * */ 5609 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5610 PetscCall(PetscSFSetFromOptions(sf)); 5611 PetscCall(PetscSFSetUp(sf)); 5612 5613 PetscCall(PetscSFCreate(comm, &osf)); 5614 /* off-diagonal */ 5615 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5616 PetscCall(PetscSFSetFromOptions(osf)); 5617 PetscCall(PetscSFSetUp(osf)); 5618 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5619 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5620 /* operate on the matrix internal data to save memory */ 5621 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5622 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5623 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5624 /* Convert to global indices for diag matrix */ 5625 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5626 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5627 /* We want P_oth store global indices */ 5628 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5629 /* Use memory scalable approach */ 5630 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5631 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5632 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5633 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5634 /* Convert back to local indices */ 5635 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5636 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5637 nout = 0; 5638 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5639 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5640 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5641 /* Exchange values */ 5642 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5644 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5645 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5646 /* Stop PETSc from shrinking memory */ 5647 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5648 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5649 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5650 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5651 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5652 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5653 PetscCall(PetscSFDestroy(&sf)); 5654 PetscCall(PetscSFDestroy(&osf)); 5655 PetscFunctionReturn(PETSC_SUCCESS); 5656 } 5657 5658 /* 5659 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5660 * This supports MPIAIJ and MAIJ 5661 * */ 5662 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5663 { 5664 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5665 Mat_SeqAIJ *p_oth; 5666 IS rows, map; 5667 PetscHMapI hamp; 5668 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5669 MPI_Comm comm; 5670 PetscSF sf, osf; 5671 PetscBool has; 5672 5673 PetscFunctionBegin; 5674 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5675 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5676 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5677 * and then create a submatrix (that often is an overlapping matrix) 5678 * */ 5679 if (reuse == MAT_INITIAL_MATRIX) { 5680 /* Use a hash table to figure out unique keys */ 5681 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5682 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5683 count = 0; 5684 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5685 for (i = 0; i < a->B->cmap->n; i++) { 5686 key = a->garray[i] / dof; 5687 PetscCall(PetscHMapIHas(hamp, key, &has)); 5688 if (!has) { 5689 mapping[i] = count; 5690 PetscCall(PetscHMapISet(hamp, key, count++)); 5691 } else { 5692 /* Current 'i' has the same value the previous step */ 5693 mapping[i] = count - 1; 5694 } 5695 } 5696 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5697 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5698 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5699 PetscCall(PetscCalloc1(htsize, &rowindices)); 5700 off = 0; 5701 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5702 PetscCall(PetscHMapIDestroy(&hamp)); 5703 PetscCall(PetscSortInt(htsize, rowindices)); 5704 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5705 /* In case, the matrix was already created but users want to recreate the matrix */ 5706 PetscCall(MatDestroy(P_oth)); 5707 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5708 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5709 PetscCall(ISDestroy(&map)); 5710 PetscCall(ISDestroy(&rows)); 5711 } else if (reuse == MAT_REUSE_MATRIX) { 5712 /* If matrix was already created, we simply update values using SF objects 5713 * that as attached to the matrix earlier. 5714 */ 5715 const PetscScalar *pd_a, *po_a; 5716 5717 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5718 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5719 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5720 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5721 /* Update values in place */ 5722 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5723 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5724 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5729 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5730 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5731 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5732 PetscFunctionReturn(PETSC_SUCCESS); 5733 } 5734 5735 /*@C 5736 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5737 5738 Collective 5739 5740 Input Parameters: 5741 + A - the first matrix in `MATMPIAIJ` format 5742 . B - the second matrix in `MATMPIAIJ` format 5743 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5744 5745 Output Parameters: 5746 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5747 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5748 - B_seq - the sequential matrix generated 5749 5750 Level: developer 5751 5752 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5753 @*/ 5754 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5755 { 5756 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5757 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5758 IS isrowb, iscolb; 5759 Mat *bseq = NULL; 5760 5761 PetscFunctionBegin; 5762 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5763 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5764 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5765 5766 if (scall == MAT_INITIAL_MATRIX) { 5767 start = A->cmap->rstart; 5768 cmap = a->garray; 5769 nzA = a->A->cmap->n; 5770 nzB = a->B->cmap->n; 5771 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5772 ncols = 0; 5773 for (i = 0; i < nzB; i++) { /* row < local row index */ 5774 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5775 else break; 5776 } 5777 imark = i; 5778 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5779 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5780 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5781 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5782 } else { 5783 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5784 isrowb = *rowb; 5785 iscolb = *colb; 5786 PetscCall(PetscMalloc1(1, &bseq)); 5787 bseq[0] = *B_seq; 5788 } 5789 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5790 *B_seq = bseq[0]; 5791 PetscCall(PetscFree(bseq)); 5792 if (!rowb) { 5793 PetscCall(ISDestroy(&isrowb)); 5794 } else { 5795 *rowb = isrowb; 5796 } 5797 if (!colb) { 5798 PetscCall(ISDestroy(&iscolb)); 5799 } else { 5800 *colb = iscolb; 5801 } 5802 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5803 PetscFunctionReturn(PETSC_SUCCESS); 5804 } 5805 5806 /* 5807 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5808 of the OFF-DIAGONAL portion of local A 5809 5810 Collective 5811 5812 Input Parameters: 5813 + A,B - the matrices in `MATMPIAIJ` format 5814 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5815 5816 Output Parameter: 5817 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5818 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5819 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5820 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5821 5822 Developer Note: 5823 This directly accesses information inside the VecScatter associated with the matrix-vector product 5824 for this matrix. This is not desirable.. 5825 5826 Level: developer 5827 5828 */ 5829 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5830 { 5831 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5832 Mat_SeqAIJ *b_oth; 5833 VecScatter ctx; 5834 MPI_Comm comm; 5835 const PetscMPIInt *rprocs, *sprocs; 5836 const PetscInt *srow, *rstarts, *sstarts; 5837 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5838 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5839 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5840 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5841 PetscMPIInt size, tag, rank, nreqs; 5842 5843 PetscFunctionBegin; 5844 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5845 PetscCallMPI(MPI_Comm_size(comm, &size)); 5846 5847 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5848 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5849 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5850 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5851 5852 if (size == 1) { 5853 startsj_s = NULL; 5854 bufa_ptr = NULL; 5855 *B_oth = NULL; 5856 PetscFunctionReturn(PETSC_SUCCESS); 5857 } 5858 5859 ctx = a->Mvctx; 5860 tag = ((PetscObject)ctx)->tag; 5861 5862 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5863 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5864 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5865 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5866 PetscCall(PetscMalloc1(nreqs, &reqs)); 5867 rwaits = reqs; 5868 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5869 5870 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5871 if (scall == MAT_INITIAL_MATRIX) { 5872 /* i-array */ 5873 /* post receives */ 5874 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5875 for (i = 0; i < nrecvs; i++) { 5876 rowlen = rvalues + rstarts[i] * rbs; 5877 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5878 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5879 } 5880 5881 /* pack the outgoing message */ 5882 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5883 5884 sstartsj[0] = 0; 5885 rstartsj[0] = 0; 5886 len = 0; /* total length of j or a array to be sent */ 5887 if (nsends) { 5888 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5889 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5890 } 5891 for (i = 0; i < nsends; i++) { 5892 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5893 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5894 for (j = 0; j < nrows; j++) { 5895 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5896 for (l = 0; l < sbs; l++) { 5897 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5898 5899 rowlen[j * sbs + l] = ncols; 5900 5901 len += ncols; 5902 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5903 } 5904 k++; 5905 } 5906 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5907 5908 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5909 } 5910 /* recvs and sends of i-array are completed */ 5911 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5912 PetscCall(PetscFree(svalues)); 5913 5914 /* allocate buffers for sending j and a arrays */ 5915 PetscCall(PetscMalloc1(len + 1, &bufj)); 5916 PetscCall(PetscMalloc1(len + 1, &bufa)); 5917 5918 /* create i-array of B_oth */ 5919 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5920 5921 b_othi[0] = 0; 5922 len = 0; /* total length of j or a array to be received */ 5923 k = 0; 5924 for (i = 0; i < nrecvs; i++) { 5925 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5926 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5927 for (j = 0; j < nrows; j++) { 5928 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5929 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5930 k++; 5931 } 5932 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5933 } 5934 PetscCall(PetscFree(rvalues)); 5935 5936 /* allocate space for j and a arrays of B_oth */ 5937 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5938 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5939 5940 /* j-array */ 5941 /* post receives of j-array */ 5942 for (i = 0; i < nrecvs; i++) { 5943 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5944 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5945 } 5946 5947 /* pack the outgoing message j-array */ 5948 if (nsends) k = sstarts[0]; 5949 for (i = 0; i < nsends; i++) { 5950 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5951 bufJ = bufj + sstartsj[i]; 5952 for (j = 0; j < nrows; j++) { 5953 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5954 for (ll = 0; ll < sbs; ll++) { 5955 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5956 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5957 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5958 } 5959 } 5960 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5961 } 5962 5963 /* recvs and sends of j-array are completed */ 5964 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5965 } else if (scall == MAT_REUSE_MATRIX) { 5966 sstartsj = *startsj_s; 5967 rstartsj = *startsj_r; 5968 bufa = *bufa_ptr; 5969 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5970 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5971 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5972 5973 /* a-array */ 5974 /* post receives of a-array */ 5975 for (i = 0; i < nrecvs; i++) { 5976 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5977 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5978 } 5979 5980 /* pack the outgoing message a-array */ 5981 if (nsends) k = sstarts[0]; 5982 for (i = 0; i < nsends; i++) { 5983 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5984 bufA = bufa + sstartsj[i]; 5985 for (j = 0; j < nrows; j++) { 5986 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5987 for (ll = 0; ll < sbs; ll++) { 5988 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5989 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5990 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5991 } 5992 } 5993 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5994 } 5995 /* recvs and sends of a-array are completed */ 5996 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5997 PetscCall(PetscFree(reqs)); 5998 5999 if (scall == MAT_INITIAL_MATRIX) { 6000 /* put together the new matrix */ 6001 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6002 6003 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6004 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6005 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6006 b_oth->free_a = PETSC_TRUE; 6007 b_oth->free_ij = PETSC_TRUE; 6008 b_oth->nonew = 0; 6009 6010 PetscCall(PetscFree(bufj)); 6011 if (!startsj_s || !bufa_ptr) { 6012 PetscCall(PetscFree2(sstartsj, rstartsj)); 6013 PetscCall(PetscFree(bufa_ptr)); 6014 } else { 6015 *startsj_s = sstartsj; 6016 *startsj_r = rstartsj; 6017 *bufa_ptr = bufa; 6018 } 6019 } else if (scall == MAT_REUSE_MATRIX) { 6020 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6021 } 6022 6023 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6024 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6025 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6026 PetscFunctionReturn(PETSC_SUCCESS); 6027 } 6028 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6032 #if defined(PETSC_HAVE_MKL_SPARSE) 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6034 #endif 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6037 #if defined(PETSC_HAVE_ELEMENTAL) 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6039 #endif 6040 #if defined(PETSC_HAVE_SCALAPACK) 6041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_HYPRE) 6044 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_CUDA) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_HIP) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6056 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6057 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6058 6059 /* 6060 Computes (B'*A')' since computing B*A directly is untenable 6061 6062 n p p 6063 [ ] [ ] [ ] 6064 m [ A ] * n [ B ] = m [ C ] 6065 [ ] [ ] [ ] 6066 6067 */ 6068 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6069 { 6070 Mat At, Bt, Ct; 6071 6072 PetscFunctionBegin; 6073 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6074 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6075 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6076 PetscCall(MatDestroy(&At)); 6077 PetscCall(MatDestroy(&Bt)); 6078 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6079 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6080 PetscCall(MatDestroy(&Ct)); 6081 PetscFunctionReturn(PETSC_SUCCESS); 6082 } 6083 6084 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6085 { 6086 PetscBool cisdense; 6087 6088 PetscFunctionBegin; 6089 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6090 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6091 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6092 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6093 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6094 PetscCall(MatSetUp(C)); 6095 6096 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6097 PetscFunctionReturn(PETSC_SUCCESS); 6098 } 6099 6100 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6101 { 6102 Mat_Product *product = C->product; 6103 Mat A = product->A, B = product->B; 6104 6105 PetscFunctionBegin; 6106 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6107 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6108 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6109 C->ops->productsymbolic = MatProductSymbolic_AB; 6110 PetscFunctionReturn(PETSC_SUCCESS); 6111 } 6112 6113 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6114 { 6115 Mat_Product *product = C->product; 6116 6117 PetscFunctionBegin; 6118 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6119 PetscFunctionReturn(PETSC_SUCCESS); 6120 } 6121 6122 /* 6123 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6124 6125 Input Parameters: 6126 6127 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6128 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6129 6130 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6131 6132 For Set1, j1[] contains column indices of the nonzeros. 6133 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6134 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6135 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6136 6137 Similar for Set2. 6138 6139 This routine merges the two sets of nonzeros row by row and removes repeats. 6140 6141 Output Parameters: (memory is allocated by the caller) 6142 6143 i[],j[]: the CSR of the merged matrix, which has m rows. 6144 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6145 imap2[]: similar to imap1[], but for Set2. 6146 Note we order nonzeros row-by-row and from left to right. 6147 */ 6148 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6149 { 6150 PetscInt r, m; /* Row index of mat */ 6151 PetscCount t, t1, t2, b1, e1, b2, e2; 6152 6153 PetscFunctionBegin; 6154 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6155 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6156 i[0] = 0; 6157 for (r = 0; r < m; r++) { /* Do row by row merging */ 6158 b1 = rowBegin1[r]; 6159 e1 = rowEnd1[r]; 6160 b2 = rowBegin2[r]; 6161 e2 = rowEnd2[r]; 6162 while (b1 < e1 && b2 < e2) { 6163 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6164 j[t] = j1[b1]; 6165 imap1[t1] = t; 6166 imap2[t2] = t; 6167 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6168 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6169 t1++; 6170 t2++; 6171 t++; 6172 } else if (j1[b1] < j2[b2]) { 6173 j[t] = j1[b1]; 6174 imap1[t1] = t; 6175 b1 += jmap1[t1 + 1] - jmap1[t1]; 6176 t1++; 6177 t++; 6178 } else { 6179 j[t] = j2[b2]; 6180 imap2[t2] = t; 6181 b2 += jmap2[t2 + 1] - jmap2[t2]; 6182 t2++; 6183 t++; 6184 } 6185 } 6186 /* Merge the remaining in either j1[] or j2[] */ 6187 while (b1 < e1) { 6188 j[t] = j1[b1]; 6189 imap1[t1] = t; 6190 b1 += jmap1[t1 + 1] - jmap1[t1]; 6191 t1++; 6192 t++; 6193 } 6194 while (b2 < e2) { 6195 j[t] = j2[b2]; 6196 imap2[t2] = t; 6197 b2 += jmap2[t2 + 1] - jmap2[t2]; 6198 t2++; 6199 t++; 6200 } 6201 i[r + 1] = t; 6202 } 6203 PetscFunctionReturn(PETSC_SUCCESS); 6204 } 6205 6206 /* 6207 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6208 6209 Input Parameters: 6210 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6211 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6212 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6213 6214 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6215 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6216 6217 Output Parameters: 6218 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6219 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6220 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6221 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6222 6223 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6224 Atot: number of entries belonging to the diagonal block. 6225 Annz: number of unique nonzeros belonging to the diagonal block. 6226 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6227 repeats (i.e., same 'i,j' pair). 6228 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6229 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6230 6231 Atot: number of entries belonging to the diagonal block 6232 Annz: number of unique nonzeros belonging to the diagonal block. 6233 6234 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6235 6236 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6237 */ 6238 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6239 { 6240 PetscInt cstart, cend, rstart, rend, row, col; 6241 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6242 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6243 PetscCount k, m, p, q, r, s, mid; 6244 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6245 6246 PetscFunctionBegin; 6247 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6248 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6249 m = rend - rstart; 6250 6251 /* Skip negative rows */ 6252 for (k = 0; k < n; k++) 6253 if (i[k] >= 0) break; 6254 6255 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6256 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6257 */ 6258 while (k < n) { 6259 row = i[k]; 6260 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6261 for (s = k; s < n; s++) 6262 if (i[s] != row) break; 6263 6264 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6265 for (p = k; p < s; p++) { 6266 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6267 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6268 } 6269 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6270 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6271 rowBegin[row - rstart] = k; 6272 rowMid[row - rstart] = mid; 6273 rowEnd[row - rstart] = s; 6274 6275 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6276 Atot += mid - k; 6277 Btot += s - mid; 6278 6279 /* Count unique nonzeros of this diag row */ 6280 for (p = k; p < mid;) { 6281 col = j[p]; 6282 do { 6283 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6284 p++; 6285 } while (p < mid && j[p] == col); 6286 Annz++; 6287 } 6288 6289 /* Count unique nonzeros of this offdiag row */ 6290 for (p = mid; p < s;) { 6291 col = j[p]; 6292 do { 6293 p++; 6294 } while (p < s && j[p] == col); 6295 Bnnz++; 6296 } 6297 k = s; 6298 } 6299 6300 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6301 PetscCall(PetscMalloc1(Atot, &Aperm)); 6302 PetscCall(PetscMalloc1(Btot, &Bperm)); 6303 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6304 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6305 6306 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6307 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6308 for (r = 0; r < m; r++) { 6309 k = rowBegin[r]; 6310 mid = rowMid[r]; 6311 s = rowEnd[r]; 6312 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6313 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6314 Atot += mid - k; 6315 Btot += s - mid; 6316 6317 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6318 for (p = k; p < mid;) { 6319 col = j[p]; 6320 q = p; 6321 do { 6322 p++; 6323 } while (p < mid && j[p] == col); 6324 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6325 Annz++; 6326 } 6327 6328 for (p = mid; p < s;) { 6329 col = j[p]; 6330 q = p; 6331 do { 6332 p++; 6333 } while (p < s && j[p] == col); 6334 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6335 Bnnz++; 6336 } 6337 } 6338 /* Output */ 6339 *Aperm_ = Aperm; 6340 *Annz_ = Annz; 6341 *Atot_ = Atot; 6342 *Ajmap_ = Ajmap; 6343 *Bperm_ = Bperm; 6344 *Bnnz_ = Bnnz; 6345 *Btot_ = Btot; 6346 *Bjmap_ = Bjmap; 6347 PetscFunctionReturn(PETSC_SUCCESS); 6348 } 6349 6350 /* 6351 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6352 6353 Input Parameters: 6354 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6355 nnz: number of unique nonzeros in the merged matrix 6356 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6357 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6358 6359 Output Parameter: (memory is allocated by the caller) 6360 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6361 6362 Example: 6363 nnz1 = 4 6364 nnz = 6 6365 imap = [1,3,4,5] 6366 jmap = [0,3,5,6,7] 6367 then, 6368 jmap_new = [0,0,3,3,5,6,7] 6369 */ 6370 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6371 { 6372 PetscCount k, p; 6373 6374 PetscFunctionBegin; 6375 jmap_new[0] = 0; 6376 p = nnz; /* p loops over jmap_new[] backwards */ 6377 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6378 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6379 } 6380 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6381 PetscFunctionReturn(PETSC_SUCCESS); 6382 } 6383 6384 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6385 { 6386 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6387 6388 PetscFunctionBegin; 6389 PetscCall(PetscSFDestroy(&coo->sf)); 6390 PetscCall(PetscFree(coo->Aperm1)); 6391 PetscCall(PetscFree(coo->Bperm1)); 6392 PetscCall(PetscFree(coo->Ajmap1)); 6393 PetscCall(PetscFree(coo->Bjmap1)); 6394 PetscCall(PetscFree(coo->Aimap2)); 6395 PetscCall(PetscFree(coo->Bimap2)); 6396 PetscCall(PetscFree(coo->Aperm2)); 6397 PetscCall(PetscFree(coo->Bperm2)); 6398 PetscCall(PetscFree(coo->Ajmap2)); 6399 PetscCall(PetscFree(coo->Bjmap2)); 6400 PetscCall(PetscFree(coo->Cperm1)); 6401 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6402 PetscCall(PetscFree(coo)); 6403 PetscFunctionReturn(PETSC_SUCCESS); 6404 } 6405 6406 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6407 { 6408 MPI_Comm comm; 6409 PetscMPIInt rank, size; 6410 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6411 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6412 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6413 PetscContainer container; 6414 MatCOOStruct_MPIAIJ *coo; 6415 6416 PetscFunctionBegin; 6417 PetscCall(PetscFree(mpiaij->garray)); 6418 PetscCall(VecDestroy(&mpiaij->lvec)); 6419 #if defined(PETSC_USE_CTABLE) 6420 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6421 #else 6422 PetscCall(PetscFree(mpiaij->colmap)); 6423 #endif 6424 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6425 mat->assembled = PETSC_FALSE; 6426 mat->was_assembled = PETSC_FALSE; 6427 6428 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6429 PetscCallMPI(MPI_Comm_size(comm, &size)); 6430 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6431 PetscCall(PetscLayoutSetUp(mat->rmap)); 6432 PetscCall(PetscLayoutSetUp(mat->cmap)); 6433 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6434 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6435 PetscCall(MatGetLocalSize(mat, &m, &n)); 6436 PetscCall(MatGetSize(mat, &M, &N)); 6437 6438 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6439 /* entries come first, then local rows, then remote rows. */ 6440 PetscCount n1 = coo_n, *perm1; 6441 PetscInt *i1 = coo_i, *j1 = coo_j; 6442 6443 PetscCall(PetscMalloc1(n1, &perm1)); 6444 for (k = 0; k < n1; k++) perm1[k] = k; 6445 6446 /* Manipulate indices so that entries with negative row or col indices will have smallest 6447 row indices, local entries will have greater but negative row indices, and remote entries 6448 will have positive row indices. 6449 */ 6450 for (k = 0; k < n1; k++) { 6451 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6452 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6453 else { 6454 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6455 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6456 } 6457 } 6458 6459 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6460 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6461 6462 /* Advance k to the first entry we need to take care of */ 6463 for (k = 0; k < n1; k++) 6464 if (i1[k] > PETSC_MIN_INT) break; 6465 PetscInt i1start = k; 6466 6467 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6468 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6469 6470 /* Send remote rows to their owner */ 6471 /* Find which rows should be sent to which remote ranks*/ 6472 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6473 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6474 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6475 const PetscInt *ranges; 6476 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6477 6478 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6479 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6480 for (k = rem; k < n1;) { 6481 PetscMPIInt owner; 6482 PetscInt firstRow, lastRow; 6483 6484 /* Locate a row range */ 6485 firstRow = i1[k]; /* first row of this owner */ 6486 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6487 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6488 6489 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6490 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6491 6492 /* All entries in [k,p) belong to this remote owner */ 6493 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6494 PetscMPIInt *sendto2; 6495 PetscInt *nentries2; 6496 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6497 6498 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6499 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6500 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6501 PetscCall(PetscFree2(sendto, nentries2)); 6502 sendto = sendto2; 6503 nentries = nentries2; 6504 maxNsend = maxNsend2; 6505 } 6506 sendto[nsend] = owner; 6507 nentries[nsend] = p - k; 6508 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6509 nsend++; 6510 k = p; 6511 } 6512 6513 /* Build 1st SF to know offsets on remote to send data */ 6514 PetscSF sf1; 6515 PetscInt nroots = 1, nroots2 = 0; 6516 PetscInt nleaves = nsend, nleaves2 = 0; 6517 PetscInt *offsets; 6518 PetscSFNode *iremote; 6519 6520 PetscCall(PetscSFCreate(comm, &sf1)); 6521 PetscCall(PetscMalloc1(nsend, &iremote)); 6522 PetscCall(PetscMalloc1(nsend, &offsets)); 6523 for (k = 0; k < nsend; k++) { 6524 iremote[k].rank = sendto[k]; 6525 iremote[k].index = 0; 6526 nleaves2 += nentries[k]; 6527 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6528 } 6529 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6530 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6531 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6532 PetscCall(PetscSFDestroy(&sf1)); 6533 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6534 6535 /* Build 2nd SF to send remote COOs to their owner */ 6536 PetscSF sf2; 6537 nroots = nroots2; 6538 nleaves = nleaves2; 6539 PetscCall(PetscSFCreate(comm, &sf2)); 6540 PetscCall(PetscSFSetFromOptions(sf2)); 6541 PetscCall(PetscMalloc1(nleaves, &iremote)); 6542 p = 0; 6543 for (k = 0; k < nsend; k++) { 6544 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6545 for (q = 0; q < nentries[k]; q++, p++) { 6546 iremote[p].rank = sendto[k]; 6547 iremote[p].index = offsets[k] + q; 6548 } 6549 } 6550 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6551 6552 /* Send the remote COOs to their owner */ 6553 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6554 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6555 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6556 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6557 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6558 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6559 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6560 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6563 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6564 6565 PetscCall(PetscFree(offsets)); 6566 PetscCall(PetscFree2(sendto, nentries)); 6567 6568 /* Sort received COOs by row along with the permutation array */ 6569 for (k = 0; k < n2; k++) perm2[k] = k; 6570 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6571 6572 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6573 PetscCount *Cperm1; 6574 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6575 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6576 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6577 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6578 6579 /* Support for HYPRE matrices, kind of a hack. 6580 Swap min column with diagonal so that diagonal values will go first */ 6581 PetscBool hypre; 6582 const char *name; 6583 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6584 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6585 if (hypre) { 6586 PetscInt *minj; 6587 PetscBT hasdiag; 6588 6589 PetscCall(PetscBTCreate(m, &hasdiag)); 6590 PetscCall(PetscMalloc1(m, &minj)); 6591 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6592 for (k = i1start; k < rem; k++) { 6593 if (j1[k] < cstart || j1[k] >= cend) continue; 6594 const PetscInt rindex = i1[k] - rstart; 6595 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6596 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6597 } 6598 for (k = 0; k < n2; k++) { 6599 if (j2[k] < cstart || j2[k] >= cend) continue; 6600 const PetscInt rindex = i2[k] - rstart; 6601 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6602 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6603 } 6604 for (k = i1start; k < rem; k++) { 6605 const PetscInt rindex = i1[k] - rstart; 6606 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6607 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6608 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6609 } 6610 for (k = 0; k < n2; k++) { 6611 const PetscInt rindex = i2[k] - rstart; 6612 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6613 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6614 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6615 } 6616 PetscCall(PetscBTDestroy(&hasdiag)); 6617 PetscCall(PetscFree(minj)); 6618 } 6619 6620 /* Split local COOs and received COOs into diag/offdiag portions */ 6621 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6622 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6623 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6624 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6625 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6626 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6627 6628 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6629 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6630 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6631 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6632 6633 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6634 PetscInt *Ai, *Bi; 6635 PetscInt *Aj, *Bj; 6636 6637 PetscCall(PetscMalloc1(m + 1, &Ai)); 6638 PetscCall(PetscMalloc1(m + 1, &Bi)); 6639 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6640 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6641 6642 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6643 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6644 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6645 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6646 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6647 6648 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6649 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6650 6651 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6652 /* expect nonzeros in A/B most likely have local contributing entries */ 6653 PetscInt Annz = Ai[m]; 6654 PetscInt Bnnz = Bi[m]; 6655 PetscCount *Ajmap1_new, *Bjmap1_new; 6656 6657 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6658 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6659 6660 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6661 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6662 6663 PetscCall(PetscFree(Aimap1)); 6664 PetscCall(PetscFree(Ajmap1)); 6665 PetscCall(PetscFree(Bimap1)); 6666 PetscCall(PetscFree(Bjmap1)); 6667 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6668 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6669 PetscCall(PetscFree(perm1)); 6670 PetscCall(PetscFree3(i2, j2, perm2)); 6671 6672 Ajmap1 = Ajmap1_new; 6673 Bjmap1 = Bjmap1_new; 6674 6675 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6676 if (Annz < Annz1 + Annz2) { 6677 PetscInt *Aj_new; 6678 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6679 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6680 PetscCall(PetscFree(Aj)); 6681 Aj = Aj_new; 6682 } 6683 6684 if (Bnnz < Bnnz1 + Bnnz2) { 6685 PetscInt *Bj_new; 6686 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6687 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6688 PetscCall(PetscFree(Bj)); 6689 Bj = Bj_new; 6690 } 6691 6692 /* Create new submatrices for on-process and off-process coupling */ 6693 PetscScalar *Aa, *Ba; 6694 MatType rtype; 6695 Mat_SeqAIJ *a, *b; 6696 PetscObjectState state; 6697 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6698 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6699 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6700 if (cstart) { 6701 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6702 } 6703 6704 PetscCall(MatGetRootType_Private(mat, &rtype)); 6705 6706 MatSeqXAIJGetOptions_Private(mpiaij->A); 6707 PetscCall(MatDestroy(&mpiaij->A)); 6708 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6709 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6710 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6711 6712 MatSeqXAIJGetOptions_Private(mpiaij->B); 6713 PetscCall(MatDestroy(&mpiaij->B)); 6714 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6715 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6716 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6717 6718 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6719 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6720 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6721 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6722 6723 a = (Mat_SeqAIJ *)mpiaij->A->data; 6724 b = (Mat_SeqAIJ *)mpiaij->B->data; 6725 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6726 a->free_a = b->free_a = PETSC_TRUE; 6727 a->free_ij = b->free_ij = PETSC_TRUE; 6728 6729 /* conversion must happen AFTER multiply setup */ 6730 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6731 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6732 PetscCall(VecDestroy(&mpiaij->lvec)); 6733 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6734 6735 // Put the COO struct in a container and then attach that to the matrix 6736 PetscCall(PetscMalloc1(1, &coo)); 6737 coo->n = coo_n; 6738 coo->sf = sf2; 6739 coo->sendlen = nleaves; 6740 coo->recvlen = nroots; 6741 coo->Annz = Annz; 6742 coo->Bnnz = Bnnz; 6743 coo->Annz2 = Annz2; 6744 coo->Bnnz2 = Bnnz2; 6745 coo->Atot1 = Atot1; 6746 coo->Atot2 = Atot2; 6747 coo->Btot1 = Btot1; 6748 coo->Btot2 = Btot2; 6749 coo->Ajmap1 = Ajmap1; 6750 coo->Aperm1 = Aperm1; 6751 coo->Bjmap1 = Bjmap1; 6752 coo->Bperm1 = Bperm1; 6753 coo->Aimap2 = Aimap2; 6754 coo->Ajmap2 = Ajmap2; 6755 coo->Aperm2 = Aperm2; 6756 coo->Bimap2 = Bimap2; 6757 coo->Bjmap2 = Bjmap2; 6758 coo->Bperm2 = Bperm2; 6759 coo->Cperm1 = Cperm1; 6760 // Allocate in preallocation. If not used, it has zero cost on host 6761 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6762 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6763 PetscCall(PetscContainerSetPointer(container, coo)); 6764 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6765 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6766 PetscCall(PetscContainerDestroy(&container)); 6767 PetscFunctionReturn(PETSC_SUCCESS); 6768 } 6769 6770 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6771 { 6772 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6773 Mat A = mpiaij->A, B = mpiaij->B; 6774 PetscScalar *Aa, *Ba; 6775 PetscScalar *sendbuf, *recvbuf; 6776 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6777 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6778 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6779 const PetscCount *Cperm1; 6780 PetscContainer container; 6781 MatCOOStruct_MPIAIJ *coo; 6782 6783 PetscFunctionBegin; 6784 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6785 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6786 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6787 sendbuf = coo->sendbuf; 6788 recvbuf = coo->recvbuf; 6789 Ajmap1 = coo->Ajmap1; 6790 Ajmap2 = coo->Ajmap2; 6791 Aimap2 = coo->Aimap2; 6792 Bjmap1 = coo->Bjmap1; 6793 Bjmap2 = coo->Bjmap2; 6794 Bimap2 = coo->Bimap2; 6795 Aperm1 = coo->Aperm1; 6796 Aperm2 = coo->Aperm2; 6797 Bperm1 = coo->Bperm1; 6798 Bperm2 = coo->Bperm2; 6799 Cperm1 = coo->Cperm1; 6800 6801 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6802 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6803 6804 /* Pack entries to be sent to remote */ 6805 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6806 6807 /* Send remote entries to their owner and overlap the communication with local computation */ 6808 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6809 /* Add local entries to A and B */ 6810 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6811 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6812 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6813 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6814 } 6815 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6816 PetscScalar sum = 0.0; 6817 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6818 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6819 } 6820 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6821 6822 /* Add received remote entries to A and B */ 6823 for (PetscCount i = 0; i < coo->Annz2; i++) { 6824 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6825 } 6826 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6827 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6828 } 6829 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6830 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6831 PetscFunctionReturn(PETSC_SUCCESS); 6832 } 6833 6834 /*MC 6835 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6836 6837 Options Database Keys: 6838 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6839 6840 Level: beginner 6841 6842 Notes: 6843 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6844 in this case the values associated with the rows and columns one passes in are set to zero 6845 in the matrix 6846 6847 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6848 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6849 6850 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6851 M*/ 6852 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6853 { 6854 Mat_MPIAIJ *b; 6855 PetscMPIInt size; 6856 6857 PetscFunctionBegin; 6858 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6859 6860 PetscCall(PetscNew(&b)); 6861 B->data = (void *)b; 6862 B->ops[0] = MatOps_Values; 6863 B->assembled = PETSC_FALSE; 6864 B->insertmode = NOT_SET_VALUES; 6865 b->size = size; 6866 6867 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6868 6869 /* build cache for off array entries formed */ 6870 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6871 6872 b->donotstash = PETSC_FALSE; 6873 b->colmap = NULL; 6874 b->garray = NULL; 6875 b->roworiented = PETSC_TRUE; 6876 6877 /* stuff used for matrix vector multiply */ 6878 b->lvec = NULL; 6879 b->Mvctx = NULL; 6880 6881 /* stuff for MatGetRow() */ 6882 b->rowindices = NULL; 6883 b->rowvalues = NULL; 6884 b->getrowactive = PETSC_FALSE; 6885 6886 /* flexible pointer used in CUSPARSE classes */ 6887 b->spptr = NULL; 6888 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6899 #if defined(PETSC_HAVE_CUDA) 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6901 #endif 6902 #if defined(PETSC_HAVE_HIP) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6904 #endif 6905 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6907 #endif 6908 #if defined(PETSC_HAVE_MKL_SPARSE) 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6910 #endif 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6915 #if defined(PETSC_HAVE_ELEMENTAL) 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6917 #endif 6918 #if defined(PETSC_HAVE_SCALAPACK) 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6920 #endif 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6923 #if defined(PETSC_HAVE_HYPRE) 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6926 #endif 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6931 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6932 PetscFunctionReturn(PETSC_SUCCESS); 6933 } 6934 6935 /*@ 6936 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6937 and "off-diagonal" part of the matrix in CSR format. 6938 6939 Collective 6940 6941 Input Parameters: 6942 + comm - MPI communicator 6943 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6944 . n - This value should be the same as the local size used in creating the 6945 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6946 calculated if `N` is given) For square matrices `n` is almost always `m`. 6947 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6948 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6949 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6950 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6951 . a - matrix values 6952 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6953 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6954 - oa - matrix values 6955 6956 Output Parameter: 6957 . mat - the matrix 6958 6959 Level: advanced 6960 6961 Notes: 6962 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6963 must free the arrays once the matrix has been destroyed and not before. 6964 6965 The `i` and `j` indices are 0 based 6966 6967 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6968 6969 This sets local rows and cannot be used to set off-processor values. 6970 6971 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6972 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6973 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6974 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6975 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6976 communication if it is known that only local entries will be set. 6977 6978 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6979 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6980 @*/ 6981 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6982 { 6983 Mat_MPIAIJ *maij; 6984 6985 PetscFunctionBegin; 6986 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6987 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6988 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6989 PetscCall(MatCreate(comm, mat)); 6990 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6991 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6992 maij = (Mat_MPIAIJ *)(*mat)->data; 6993 6994 (*mat)->preallocated = PETSC_TRUE; 6995 6996 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6997 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6998 6999 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7000 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7001 7002 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7003 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7004 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7005 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7006 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7007 PetscFunctionReturn(PETSC_SUCCESS); 7008 } 7009 7010 typedef struct { 7011 Mat *mp; /* intermediate products */ 7012 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7013 PetscInt cp; /* number of intermediate products */ 7014 7015 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7016 PetscInt *startsj_s, *startsj_r; 7017 PetscScalar *bufa; 7018 Mat P_oth; 7019 7020 /* may take advantage of merging product->B */ 7021 Mat Bloc; /* B-local by merging diag and off-diag */ 7022 7023 /* cusparse does not have support to split between symbolic and numeric phases. 7024 When api_user is true, we don't need to update the numerical values 7025 of the temporary storage */ 7026 PetscBool reusesym; 7027 7028 /* support for COO values insertion */ 7029 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7030 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7031 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7032 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7033 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7034 PetscMemType mtype; 7035 7036 /* customization */ 7037 PetscBool abmerge; 7038 PetscBool P_oth_bind; 7039 } MatMatMPIAIJBACKEND; 7040 7041 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7042 { 7043 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7044 PetscInt i; 7045 7046 PetscFunctionBegin; 7047 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7048 PetscCall(PetscFree(mmdata->bufa)); 7049 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7050 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7051 PetscCall(MatDestroy(&mmdata->P_oth)); 7052 PetscCall(MatDestroy(&mmdata->Bloc)); 7053 PetscCall(PetscSFDestroy(&mmdata->sf)); 7054 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7055 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7056 PetscCall(PetscFree(mmdata->own[0])); 7057 PetscCall(PetscFree(mmdata->own)); 7058 PetscCall(PetscFree(mmdata->off[0])); 7059 PetscCall(PetscFree(mmdata->off)); 7060 PetscCall(PetscFree(mmdata)); 7061 PetscFunctionReturn(PETSC_SUCCESS); 7062 } 7063 7064 /* Copy selected n entries with indices in idx[] of A to v[]. 7065 If idx is NULL, copy the whole data array of A to v[] 7066 */ 7067 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7068 { 7069 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7070 7071 PetscFunctionBegin; 7072 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7073 if (f) { 7074 PetscCall((*f)(A, n, idx, v)); 7075 } else { 7076 const PetscScalar *vv; 7077 7078 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7079 if (n && idx) { 7080 PetscScalar *w = v; 7081 const PetscInt *oi = idx; 7082 PetscInt j; 7083 7084 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7085 } else { 7086 PetscCall(PetscArraycpy(v, vv, n)); 7087 } 7088 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7089 } 7090 PetscFunctionReturn(PETSC_SUCCESS); 7091 } 7092 7093 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7094 { 7095 MatMatMPIAIJBACKEND *mmdata; 7096 PetscInt i, n_d, n_o; 7097 7098 PetscFunctionBegin; 7099 MatCheckProduct(C, 1); 7100 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7101 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7102 if (!mmdata->reusesym) { /* update temporary matrices */ 7103 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7104 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7105 } 7106 mmdata->reusesym = PETSC_FALSE; 7107 7108 for (i = 0; i < mmdata->cp; i++) { 7109 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7110 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7111 } 7112 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7113 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7114 7115 if (mmdata->mptmp[i]) continue; 7116 if (noff) { 7117 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7118 7119 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7120 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7121 n_o += noff; 7122 n_d += nown; 7123 } else { 7124 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7125 7126 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7127 n_d += mm->nz; 7128 } 7129 } 7130 if (mmdata->hasoffproc) { /* offprocess insertion */ 7131 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7132 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7133 } 7134 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7135 PetscFunctionReturn(PETSC_SUCCESS); 7136 } 7137 7138 /* Support for Pt * A, A * P, or Pt * A * P */ 7139 #define MAX_NUMBER_INTERMEDIATE 4 7140 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7141 { 7142 Mat_Product *product = C->product; 7143 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7144 Mat_MPIAIJ *a, *p; 7145 MatMatMPIAIJBACKEND *mmdata; 7146 ISLocalToGlobalMapping P_oth_l2g = NULL; 7147 IS glob = NULL; 7148 const char *prefix; 7149 char pprefix[256]; 7150 const PetscInt *globidx, *P_oth_idx; 7151 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7152 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7153 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7154 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7155 /* a base offset; type-2: sparse with a local to global map table */ 7156 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7157 7158 MatProductType ptype; 7159 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7160 PetscMPIInt size; 7161 7162 PetscFunctionBegin; 7163 MatCheckProduct(C, 1); 7164 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7165 ptype = product->type; 7166 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7167 ptype = MATPRODUCT_AB; 7168 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7169 } 7170 switch (ptype) { 7171 case MATPRODUCT_AB: 7172 A = product->A; 7173 P = product->B; 7174 m = A->rmap->n; 7175 n = P->cmap->n; 7176 M = A->rmap->N; 7177 N = P->cmap->N; 7178 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7179 break; 7180 case MATPRODUCT_AtB: 7181 P = product->A; 7182 A = product->B; 7183 m = P->cmap->n; 7184 n = A->cmap->n; 7185 M = P->cmap->N; 7186 N = A->cmap->N; 7187 hasoffproc = PETSC_TRUE; 7188 break; 7189 case MATPRODUCT_PtAP: 7190 A = product->A; 7191 P = product->B; 7192 m = P->cmap->n; 7193 n = P->cmap->n; 7194 M = P->cmap->N; 7195 N = P->cmap->N; 7196 hasoffproc = PETSC_TRUE; 7197 break; 7198 default: 7199 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7200 } 7201 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7202 if (size == 1) hasoffproc = PETSC_FALSE; 7203 7204 /* defaults */ 7205 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7206 mp[i] = NULL; 7207 mptmp[i] = PETSC_FALSE; 7208 rmapt[i] = -1; 7209 cmapt[i] = -1; 7210 rmapa[i] = NULL; 7211 cmapa[i] = NULL; 7212 } 7213 7214 /* customization */ 7215 PetscCall(PetscNew(&mmdata)); 7216 mmdata->reusesym = product->api_user; 7217 if (ptype == MATPRODUCT_AB) { 7218 if (product->api_user) { 7219 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7220 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7221 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7222 PetscOptionsEnd(); 7223 } else { 7224 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7225 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7226 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7227 PetscOptionsEnd(); 7228 } 7229 } else if (ptype == MATPRODUCT_PtAP) { 7230 if (product->api_user) { 7231 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7232 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7233 PetscOptionsEnd(); 7234 } else { 7235 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7236 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7237 PetscOptionsEnd(); 7238 } 7239 } 7240 a = (Mat_MPIAIJ *)A->data; 7241 p = (Mat_MPIAIJ *)P->data; 7242 PetscCall(MatSetSizes(C, m, n, M, N)); 7243 PetscCall(PetscLayoutSetUp(C->rmap)); 7244 PetscCall(PetscLayoutSetUp(C->cmap)); 7245 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7246 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7247 7248 cp = 0; 7249 switch (ptype) { 7250 case MATPRODUCT_AB: /* A * P */ 7251 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7252 7253 /* A_diag * P_local (merged or not) */ 7254 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7255 /* P is product->B */ 7256 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7257 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7258 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7259 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7260 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7261 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7262 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7263 mp[cp]->product->api_user = product->api_user; 7264 PetscCall(MatProductSetFromOptions(mp[cp])); 7265 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7266 PetscCall(ISGetIndices(glob, &globidx)); 7267 rmapt[cp] = 1; 7268 cmapt[cp] = 2; 7269 cmapa[cp] = globidx; 7270 mptmp[cp] = PETSC_FALSE; 7271 cp++; 7272 } else { /* A_diag * P_diag and A_diag * P_off */ 7273 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7274 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7275 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7276 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7277 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7278 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7279 mp[cp]->product->api_user = product->api_user; 7280 PetscCall(MatProductSetFromOptions(mp[cp])); 7281 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7282 rmapt[cp] = 1; 7283 cmapt[cp] = 1; 7284 mptmp[cp] = PETSC_FALSE; 7285 cp++; 7286 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7287 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7288 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7289 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7290 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7291 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7292 mp[cp]->product->api_user = product->api_user; 7293 PetscCall(MatProductSetFromOptions(mp[cp])); 7294 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7295 rmapt[cp] = 1; 7296 cmapt[cp] = 2; 7297 cmapa[cp] = p->garray; 7298 mptmp[cp] = PETSC_FALSE; 7299 cp++; 7300 } 7301 7302 /* A_off * P_other */ 7303 if (mmdata->P_oth) { 7304 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7305 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7306 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7307 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7308 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7309 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7310 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7311 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7312 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7313 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7314 mp[cp]->product->api_user = product->api_user; 7315 PetscCall(MatProductSetFromOptions(mp[cp])); 7316 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7317 rmapt[cp] = 1; 7318 cmapt[cp] = 2; 7319 cmapa[cp] = P_oth_idx; 7320 mptmp[cp] = PETSC_FALSE; 7321 cp++; 7322 } 7323 break; 7324 7325 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7326 /* A is product->B */ 7327 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7328 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7329 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7330 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7331 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7332 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7333 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7334 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7335 mp[cp]->product->api_user = product->api_user; 7336 PetscCall(MatProductSetFromOptions(mp[cp])); 7337 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7338 PetscCall(ISGetIndices(glob, &globidx)); 7339 rmapt[cp] = 2; 7340 rmapa[cp] = globidx; 7341 cmapt[cp] = 2; 7342 cmapa[cp] = globidx; 7343 mptmp[cp] = PETSC_FALSE; 7344 cp++; 7345 } else { 7346 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7347 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7348 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7349 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7350 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7351 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7352 mp[cp]->product->api_user = product->api_user; 7353 PetscCall(MatProductSetFromOptions(mp[cp])); 7354 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7355 PetscCall(ISGetIndices(glob, &globidx)); 7356 rmapt[cp] = 1; 7357 cmapt[cp] = 2; 7358 cmapa[cp] = globidx; 7359 mptmp[cp] = PETSC_FALSE; 7360 cp++; 7361 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7362 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7363 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7364 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7365 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7366 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7367 mp[cp]->product->api_user = product->api_user; 7368 PetscCall(MatProductSetFromOptions(mp[cp])); 7369 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7370 rmapt[cp] = 2; 7371 rmapa[cp] = p->garray; 7372 cmapt[cp] = 2; 7373 cmapa[cp] = globidx; 7374 mptmp[cp] = PETSC_FALSE; 7375 cp++; 7376 } 7377 break; 7378 case MATPRODUCT_PtAP: 7379 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7380 /* P is product->B */ 7381 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7382 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7383 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7384 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7385 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7386 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7387 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7388 mp[cp]->product->api_user = product->api_user; 7389 PetscCall(MatProductSetFromOptions(mp[cp])); 7390 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7391 PetscCall(ISGetIndices(glob, &globidx)); 7392 rmapt[cp] = 2; 7393 rmapa[cp] = globidx; 7394 cmapt[cp] = 2; 7395 cmapa[cp] = globidx; 7396 mptmp[cp] = PETSC_FALSE; 7397 cp++; 7398 if (mmdata->P_oth) { 7399 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7400 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7401 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7402 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7403 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7404 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7405 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7406 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7407 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7408 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7409 mp[cp]->product->api_user = product->api_user; 7410 PetscCall(MatProductSetFromOptions(mp[cp])); 7411 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7412 mptmp[cp] = PETSC_TRUE; 7413 cp++; 7414 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7415 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7416 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7417 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7418 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7419 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7420 mp[cp]->product->api_user = product->api_user; 7421 PetscCall(MatProductSetFromOptions(mp[cp])); 7422 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7423 rmapt[cp] = 2; 7424 rmapa[cp] = globidx; 7425 cmapt[cp] = 2; 7426 cmapa[cp] = P_oth_idx; 7427 mptmp[cp] = PETSC_FALSE; 7428 cp++; 7429 } 7430 break; 7431 default: 7432 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7433 } 7434 /* sanity check */ 7435 if (size > 1) 7436 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7437 7438 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7439 for (i = 0; i < cp; i++) { 7440 mmdata->mp[i] = mp[i]; 7441 mmdata->mptmp[i] = mptmp[i]; 7442 } 7443 mmdata->cp = cp; 7444 C->product->data = mmdata; 7445 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7446 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7447 7448 /* memory type */ 7449 mmdata->mtype = PETSC_MEMTYPE_HOST; 7450 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7451 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7452 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7453 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7454 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7455 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7456 7457 /* prepare coo coordinates for values insertion */ 7458 7459 /* count total nonzeros of those intermediate seqaij Mats 7460 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7461 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7462 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7463 */ 7464 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7465 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7466 if (mptmp[cp]) continue; 7467 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7468 const PetscInt *rmap = rmapa[cp]; 7469 const PetscInt mr = mp[cp]->rmap->n; 7470 const PetscInt rs = C->rmap->rstart; 7471 const PetscInt re = C->rmap->rend; 7472 const PetscInt *ii = mm->i; 7473 for (i = 0; i < mr; i++) { 7474 const PetscInt gr = rmap[i]; 7475 const PetscInt nz = ii[i + 1] - ii[i]; 7476 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7477 else ncoo_oown += nz; /* this row is local */ 7478 } 7479 } else ncoo_d += mm->nz; 7480 } 7481 7482 /* 7483 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7484 7485 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7486 7487 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7488 7489 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7490 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7491 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7492 7493 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7494 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7495 */ 7496 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7497 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7498 7499 /* gather (i,j) of nonzeros inserted by remote procs */ 7500 if (hasoffproc) { 7501 PetscSF msf; 7502 PetscInt ncoo2, *coo_i2, *coo_j2; 7503 7504 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7505 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7506 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7507 7508 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7509 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7510 PetscInt *idxoff = mmdata->off[cp]; 7511 PetscInt *idxown = mmdata->own[cp]; 7512 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7513 const PetscInt *rmap = rmapa[cp]; 7514 const PetscInt *cmap = cmapa[cp]; 7515 const PetscInt *ii = mm->i; 7516 PetscInt *coi = coo_i + ncoo_o; 7517 PetscInt *coj = coo_j + ncoo_o; 7518 const PetscInt mr = mp[cp]->rmap->n; 7519 const PetscInt rs = C->rmap->rstart; 7520 const PetscInt re = C->rmap->rend; 7521 const PetscInt cs = C->cmap->rstart; 7522 for (i = 0; i < mr; i++) { 7523 const PetscInt *jj = mm->j + ii[i]; 7524 const PetscInt gr = rmap[i]; 7525 const PetscInt nz = ii[i + 1] - ii[i]; 7526 if (gr < rs || gr >= re) { /* this is an offproc row */ 7527 for (j = ii[i]; j < ii[i + 1]; j++) { 7528 *coi++ = gr; 7529 *idxoff++ = j; 7530 } 7531 if (!cmapt[cp]) { /* already global */ 7532 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7533 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7534 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7535 } else { /* offdiag */ 7536 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7537 } 7538 ncoo_o += nz; 7539 } else { /* this is a local row */ 7540 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7541 } 7542 } 7543 } 7544 mmdata->off[cp + 1] = idxoff; 7545 mmdata->own[cp + 1] = idxown; 7546 } 7547 7548 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7549 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7550 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7551 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7552 ncoo = ncoo_d + ncoo_oown + ncoo2; 7553 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7554 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7555 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7556 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7557 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7558 PetscCall(PetscFree2(coo_i, coo_j)); 7559 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7560 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7561 coo_i = coo_i2; 7562 coo_j = coo_j2; 7563 } else { /* no offproc values insertion */ 7564 ncoo = ncoo_d; 7565 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7566 7567 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7568 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7569 PetscCall(PetscSFSetUp(mmdata->sf)); 7570 } 7571 mmdata->hasoffproc = hasoffproc; 7572 7573 /* gather (i,j) of nonzeros inserted locally */ 7574 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7575 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7576 PetscInt *coi = coo_i + ncoo_d; 7577 PetscInt *coj = coo_j + ncoo_d; 7578 const PetscInt *jj = mm->j; 7579 const PetscInt *ii = mm->i; 7580 const PetscInt *cmap = cmapa[cp]; 7581 const PetscInt *rmap = rmapa[cp]; 7582 const PetscInt mr = mp[cp]->rmap->n; 7583 const PetscInt rs = C->rmap->rstart; 7584 const PetscInt re = C->rmap->rend; 7585 const PetscInt cs = C->cmap->rstart; 7586 7587 if (mptmp[cp]) continue; 7588 if (rmapt[cp] == 1) { /* consecutive rows */ 7589 /* fill coo_i */ 7590 for (i = 0; i < mr; i++) { 7591 const PetscInt gr = i + rs; 7592 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7593 } 7594 /* fill coo_j */ 7595 if (!cmapt[cp]) { /* type-0, already global */ 7596 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7597 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7598 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7599 } else { /* type-2, local to global for sparse columns */ 7600 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7601 } 7602 ncoo_d += mm->nz; 7603 } else if (rmapt[cp] == 2) { /* sparse rows */ 7604 for (i = 0; i < mr; i++) { 7605 const PetscInt *jj = mm->j + ii[i]; 7606 const PetscInt gr = rmap[i]; 7607 const PetscInt nz = ii[i + 1] - ii[i]; 7608 if (gr >= rs && gr < re) { /* local rows */ 7609 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7610 if (!cmapt[cp]) { /* type-0, already global */ 7611 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7612 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7613 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7614 } else { /* type-2, local to global for sparse columns */ 7615 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7616 } 7617 ncoo_d += nz; 7618 } 7619 } 7620 } 7621 } 7622 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7623 PetscCall(ISDestroy(&glob)); 7624 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7625 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7626 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7627 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7628 7629 /* preallocate with COO data */ 7630 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7631 PetscCall(PetscFree2(coo_i, coo_j)); 7632 PetscFunctionReturn(PETSC_SUCCESS); 7633 } 7634 7635 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7636 { 7637 Mat_Product *product = mat->product; 7638 #if defined(PETSC_HAVE_DEVICE) 7639 PetscBool match = PETSC_FALSE; 7640 PetscBool usecpu = PETSC_FALSE; 7641 #else 7642 PetscBool match = PETSC_TRUE; 7643 #endif 7644 7645 PetscFunctionBegin; 7646 MatCheckProduct(mat, 1); 7647 #if defined(PETSC_HAVE_DEVICE) 7648 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7649 if (match) { /* we can always fallback to the CPU if requested */ 7650 switch (product->type) { 7651 case MATPRODUCT_AB: 7652 if (product->api_user) { 7653 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7654 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7655 PetscOptionsEnd(); 7656 } else { 7657 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7658 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7659 PetscOptionsEnd(); 7660 } 7661 break; 7662 case MATPRODUCT_AtB: 7663 if (product->api_user) { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7665 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } else { 7668 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7669 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7670 PetscOptionsEnd(); 7671 } 7672 break; 7673 case MATPRODUCT_PtAP: 7674 if (product->api_user) { 7675 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7676 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7677 PetscOptionsEnd(); 7678 } else { 7679 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7680 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7681 PetscOptionsEnd(); 7682 } 7683 break; 7684 default: 7685 break; 7686 } 7687 match = (PetscBool)!usecpu; 7688 } 7689 #endif 7690 if (match) { 7691 switch (product->type) { 7692 case MATPRODUCT_AB: 7693 case MATPRODUCT_AtB: 7694 case MATPRODUCT_PtAP: 7695 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7696 break; 7697 default: 7698 break; 7699 } 7700 } 7701 /* fallback to MPIAIJ ops */ 7702 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7703 PetscFunctionReturn(PETSC_SUCCESS); 7704 } 7705 7706 /* 7707 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7708 7709 n - the number of block indices in cc[] 7710 cc - the block indices (must be large enough to contain the indices) 7711 */ 7712 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7713 { 7714 PetscInt cnt = -1, nidx, j; 7715 const PetscInt *idx; 7716 7717 PetscFunctionBegin; 7718 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7719 if (nidx) { 7720 cnt = 0; 7721 cc[cnt] = idx[0] / bs; 7722 for (j = 1; j < nidx; j++) { 7723 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7724 } 7725 } 7726 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7727 *n = cnt + 1; 7728 PetscFunctionReturn(PETSC_SUCCESS); 7729 } 7730 7731 /* 7732 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7733 7734 ncollapsed - the number of block indices 7735 collapsed - the block indices (must be large enough to contain the indices) 7736 */ 7737 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7738 { 7739 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7740 7741 PetscFunctionBegin; 7742 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7743 for (i = start + 1; i < start + bs; i++) { 7744 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7745 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7746 cprevtmp = cprev; 7747 cprev = merged; 7748 merged = cprevtmp; 7749 } 7750 *ncollapsed = nprev; 7751 if (collapsed) *collapsed = cprev; 7752 PetscFunctionReturn(PETSC_SUCCESS); 7753 } 7754 7755 /* 7756 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7757 7758 Input Parameter: 7759 . Amat - matrix 7760 - symmetrize - make the result symmetric 7761 + scale - scale with diagonal 7762 7763 Output Parameter: 7764 . a_Gmat - output scalar graph >= 0 7765 7766 */ 7767 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7768 { 7769 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7770 MPI_Comm comm; 7771 Mat Gmat; 7772 PetscBool ismpiaij, isseqaij; 7773 Mat a, b, c; 7774 MatType jtype; 7775 7776 PetscFunctionBegin; 7777 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7778 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7779 PetscCall(MatGetSize(Amat, &MM, &NN)); 7780 PetscCall(MatGetBlockSize(Amat, &bs)); 7781 nloc = (Iend - Istart) / bs; 7782 7783 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7784 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7785 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7786 7787 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7788 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7789 implementation */ 7790 if (bs > 1) { 7791 PetscCall(MatGetType(Amat, &jtype)); 7792 PetscCall(MatCreate(comm, &Gmat)); 7793 PetscCall(MatSetType(Gmat, jtype)); 7794 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7795 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7796 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7797 PetscInt *d_nnz, *o_nnz; 7798 MatScalar *aa, val, *AA; 7799 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7800 if (isseqaij) { 7801 a = Amat; 7802 b = NULL; 7803 } else { 7804 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7805 a = d->A; 7806 b = d->B; 7807 } 7808 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7809 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7810 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7811 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7812 const PetscInt *cols1, *cols2; 7813 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7814 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7815 nnz[brow / bs] = nc2 / bs; 7816 if (nc2 % bs) ok = 0; 7817 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7818 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7819 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7820 if (nc1 != nc2) ok = 0; 7821 else { 7822 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7823 if (cols1[jj] != cols2[jj]) ok = 0; 7824 if (cols1[jj] % bs != jj % bs) ok = 0; 7825 } 7826 } 7827 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7828 } 7829 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7830 if (!ok) { 7831 PetscCall(PetscFree2(d_nnz, o_nnz)); 7832 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7833 goto old_bs; 7834 } 7835 } 7836 } 7837 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7838 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7839 PetscCall(PetscFree2(d_nnz, o_nnz)); 7840 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7841 // diag 7842 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7843 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7844 ai = aseq->i; 7845 n = ai[brow + 1] - ai[brow]; 7846 aj = aseq->j + ai[brow]; 7847 for (int k = 0; k < n; k += bs) { // block columns 7848 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7849 val = 0; 7850 if (index_size == 0) { 7851 for (int ii = 0; ii < bs; ii++) { // rows in block 7852 aa = aseq->a + ai[brow + ii] + k; 7853 for (int jj = 0; jj < bs; jj++) { // columns in block 7854 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7855 } 7856 } 7857 } else { // use (index,index) value if provided 7858 for (int iii = 0; iii < index_size; iii++) { // rows in block 7859 int ii = index[iii]; 7860 aa = aseq->a + ai[brow + ii] + k; 7861 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7862 int jj = index[jjj]; 7863 val += PetscAbs(PetscRealPart(aa[jj])); 7864 } 7865 } 7866 } 7867 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7868 AA[k / bs] = val; 7869 } 7870 grow = Istart / bs + brow / bs; 7871 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7872 } 7873 // off-diag 7874 if (ismpiaij) { 7875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7876 const PetscScalar *vals; 7877 const PetscInt *cols, *garray = aij->garray; 7878 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7879 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7880 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7881 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7882 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7883 AA[k / bs] = 0; 7884 AJ[cidx] = garray[cols[k]] / bs; 7885 } 7886 nc = ncols / bs; 7887 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7888 if (index_size == 0) { 7889 for (int ii = 0; ii < bs; ii++) { // rows in block 7890 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7891 for (int k = 0; k < ncols; k += bs) { 7892 for (int jj = 0; jj < bs; jj++) { // cols in block 7893 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7894 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7895 } 7896 } 7897 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7898 } 7899 } else { // use (index,index) value if provided 7900 for (int iii = 0; iii < index_size; iii++) { // rows in block 7901 int ii = index[iii]; 7902 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7903 for (int k = 0; k < ncols; k += bs) { 7904 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7905 int jj = index[jjj]; 7906 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7907 } 7908 } 7909 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7910 } 7911 } 7912 grow = Istart / bs + brow / bs; 7913 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7914 } 7915 } 7916 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7917 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7918 PetscCall(PetscFree2(AA, AJ)); 7919 } else { 7920 const PetscScalar *vals; 7921 const PetscInt *idx; 7922 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7923 old_bs: 7924 /* 7925 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7926 */ 7927 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7928 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7929 if (isseqaij) { 7930 PetscInt max_d_nnz; 7931 /* 7932 Determine exact preallocation count for (sequential) scalar matrix 7933 */ 7934 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7935 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7936 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7937 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7938 PetscCall(PetscFree3(w0, w1, w2)); 7939 } else if (ismpiaij) { 7940 Mat Daij, Oaij; 7941 const PetscInt *garray; 7942 PetscInt max_d_nnz; 7943 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7944 /* 7945 Determine exact preallocation count for diagonal block portion of scalar matrix 7946 */ 7947 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7948 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7949 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7950 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7951 PetscCall(PetscFree3(w0, w1, w2)); 7952 /* 7953 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7954 */ 7955 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7956 o_nnz[jj] = 0; 7957 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7958 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7959 o_nnz[jj] += ncols; 7960 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7961 } 7962 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7963 } 7964 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7965 /* get scalar copy (norms) of matrix */ 7966 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7967 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7968 PetscCall(PetscFree2(d_nnz, o_nnz)); 7969 for (Ii = Istart; Ii < Iend; Ii++) { 7970 PetscInt dest_row = Ii / bs; 7971 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7972 for (jj = 0; jj < ncols; jj++) { 7973 PetscInt dest_col = idx[jj] / bs; 7974 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7975 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7976 } 7977 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7978 } 7979 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7980 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7981 } 7982 } else { 7983 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7984 else { 7985 Gmat = Amat; 7986 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7987 } 7988 if (isseqaij) { 7989 a = Gmat; 7990 b = NULL; 7991 } else { 7992 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7993 a = d->A; 7994 b = d->B; 7995 } 7996 if (filter >= 0 || scale) { 7997 /* take absolute value of each entry */ 7998 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7999 MatInfo info; 8000 PetscScalar *avals; 8001 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8002 PetscCall(MatSeqAIJGetArray(c, &avals)); 8003 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8004 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8005 } 8006 } 8007 } 8008 if (symmetrize) { 8009 PetscBool isset, issym; 8010 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8011 if (!isset || !issym) { 8012 Mat matTrans; 8013 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8014 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8015 PetscCall(MatDestroy(&matTrans)); 8016 } 8017 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8018 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8019 if (scale) { 8020 /* scale c for all diagonal values = 1 or -1 */ 8021 Vec diag; 8022 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8023 PetscCall(MatGetDiagonal(Gmat, diag)); 8024 PetscCall(VecReciprocal(diag)); 8025 PetscCall(VecSqrtAbs(diag)); 8026 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8027 PetscCall(VecDestroy(&diag)); 8028 } 8029 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8030 8031 if (filter >= 0) { 8032 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8033 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8034 } 8035 *a_Gmat = Gmat; 8036 PetscFunctionReturn(PETSC_SUCCESS); 8037 } 8038 8039 /* 8040 Special version for direct calls from Fortran 8041 */ 8042 8043 /* Change these macros so can be used in void function */ 8044 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8045 #undef PetscCall 8046 #define PetscCall(...) \ 8047 do { \ 8048 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8049 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8050 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8051 return; \ 8052 } \ 8053 } while (0) 8054 8055 #undef SETERRQ 8056 #define SETERRQ(comm, ierr, ...) \ 8057 do { \ 8058 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8059 return; \ 8060 } while (0) 8061 8062 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8063 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8064 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8065 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8066 #else 8067 #endif 8068 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8069 { 8070 Mat mat = *mmat; 8071 PetscInt m = *mm, n = *mn; 8072 InsertMode addv = *maddv; 8073 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8074 PetscScalar value; 8075 8076 MatCheckPreallocated(mat, 1); 8077 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8078 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8079 { 8080 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8081 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8082 PetscBool roworiented = aij->roworiented; 8083 8084 /* Some Variables required in the macro */ 8085 Mat A = aij->A; 8086 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8087 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8088 MatScalar *aa; 8089 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8090 Mat B = aij->B; 8091 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8092 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8093 MatScalar *ba; 8094 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8095 * cannot use "#if defined" inside a macro. */ 8096 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8097 8098 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8099 PetscInt nonew = a->nonew; 8100 MatScalar *ap1, *ap2; 8101 8102 PetscFunctionBegin; 8103 PetscCall(MatSeqAIJGetArray(A, &aa)); 8104 PetscCall(MatSeqAIJGetArray(B, &ba)); 8105 for (i = 0; i < m; i++) { 8106 if (im[i] < 0) continue; 8107 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8108 if (im[i] >= rstart && im[i] < rend) { 8109 row = im[i] - rstart; 8110 lastcol1 = -1; 8111 rp1 = aj + ai[row]; 8112 ap1 = aa + ai[row]; 8113 rmax1 = aimax[row]; 8114 nrow1 = ailen[row]; 8115 low1 = 0; 8116 high1 = nrow1; 8117 lastcol2 = -1; 8118 rp2 = bj + bi[row]; 8119 ap2 = ba + bi[row]; 8120 rmax2 = bimax[row]; 8121 nrow2 = bilen[row]; 8122 low2 = 0; 8123 high2 = nrow2; 8124 8125 for (j = 0; j < n; j++) { 8126 if (roworiented) value = v[i * n + j]; 8127 else value = v[i + j * m]; 8128 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8129 if (in[j] >= cstart && in[j] < cend) { 8130 col = in[j] - cstart; 8131 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8132 } else if (in[j] < 0) continue; 8133 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8134 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8135 } else { 8136 if (mat->was_assembled) { 8137 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8138 #if defined(PETSC_USE_CTABLE) 8139 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8140 col--; 8141 #else 8142 col = aij->colmap[in[j]] - 1; 8143 #endif 8144 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8145 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8146 col = in[j]; 8147 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8148 B = aij->B; 8149 b = (Mat_SeqAIJ *)B->data; 8150 bimax = b->imax; 8151 bi = b->i; 8152 bilen = b->ilen; 8153 bj = b->j; 8154 rp2 = bj + bi[row]; 8155 ap2 = ba + bi[row]; 8156 rmax2 = bimax[row]; 8157 nrow2 = bilen[row]; 8158 low2 = 0; 8159 high2 = nrow2; 8160 bm = aij->B->rmap->n; 8161 ba = b->a; 8162 inserted = PETSC_FALSE; 8163 } 8164 } else col = in[j]; 8165 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8166 } 8167 } 8168 } else if (!aij->donotstash) { 8169 if (roworiented) { 8170 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8171 } else { 8172 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8173 } 8174 } 8175 } 8176 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8177 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8178 } 8179 PetscFunctionReturnVoid(); 8180 } 8181 8182 /* Undefining these here since they were redefined from their original definition above! No 8183 * other PETSc functions should be defined past this point, as it is impossible to recover the 8184 * original definitions */ 8185 #undef PetscCall 8186 #undef SETERRQ 8187