1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 A->nonzerostate++; \ 422 a_noinsert:; \ 423 ailen[row] = nrow1; \ 424 } while (0) 425 426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 427 do { \ 428 if (col <= lastcol2) low2 = 0; \ 429 else high2 = nrow2; \ 430 lastcol2 = col; \ 431 while (high2 - low2 > 5) { \ 432 t = (low2 + high2) / 2; \ 433 if (rp2[t] > col) high2 = t; \ 434 else low2 = t; \ 435 } \ 436 for (_i = low2; _i < high2; _i++) { \ 437 if (rp2[_i] > col) break; \ 438 if (rp2[_i] == col) { \ 439 if (addv == ADD_VALUES) { \ 440 ap2[_i] += value; \ 441 (void)PetscLogFlops(1.0); \ 442 } else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) { \ 447 low2 = 0; \ 448 high2 = nrow2; \ 449 goto b_noinsert; \ 450 } \ 451 if (nonew == 1) { \ 452 low2 = 0; \ 453 high2 = nrow2; \ 454 goto b_noinsert; \ 455 } \ 456 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 457 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 458 N = nrow2++ - 1; \ 459 b->nz++; \ 460 high2++; \ 461 /* shift up all the later entries in this row */ \ 462 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 463 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 464 rp2[_i] = col; \ 465 ap2[_i] = value; \ 466 B->nonzerostate++; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscMPIInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 1804 PetscFunctionBegin; 1805 if (aij->size == 1) { 1806 PetscCall(MatNorm(aij->A, type, norm)); 1807 } else { 1808 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1809 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1810 if (type == NORM_FROBENIUS) { 1811 v = amata; 1812 for (i = 0; i < amat->nz; i++) { 1813 sum += PetscRealPart(PetscConj(*v) * (*v)); 1814 v++; 1815 } 1816 v = bmata; 1817 for (i = 0; i < bmat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v) * (*v)); 1819 v++; 1820 } 1821 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1822 *norm = PetscSqrtReal(*norm); 1823 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1824 } else if (type == NORM_1) { /* max column norm */ 1825 PetscReal *tmp, *tmp2; 1826 PetscInt *jj, *garray = aij->garray; 1827 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1828 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1829 *norm = 0.0; 1830 v = amata; 1831 jj = amat->j; 1832 for (j = 0; j < amat->nz; j++) { 1833 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1834 v++; 1835 } 1836 v = bmata; 1837 jj = bmat->j; 1838 for (j = 0; j < bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1840 v++; 1841 } 1842 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1843 for (j = 0; j < mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 PetscCall(PetscFree(tmp)); 1847 PetscCall(PetscFree(tmp2)); 1848 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j = 0; j < aij->A->rmap->n; j++) { 1852 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1853 sum = 0.0; 1854 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); 1856 v++; 1857 } 1858 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1859 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1860 sum += PetscAbsScalar(*v); 1861 v++; 1862 } 1863 if (sum > ntemp) ntemp = sum; 1864 } 1865 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1866 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1867 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1868 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1869 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1875 { 1876 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1877 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1878 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1879 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1880 Mat B, A_diag, *B_diag; 1881 const MatScalar *pbv, *bv; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1885 ma = A->rmap->n; 1886 na = A->cmap->n; 1887 mb = a->B->rmap->n; 1888 nb = a->B->cmap->n; 1889 ai = Aloc->i; 1890 aj = Aloc->j; 1891 bi = Bloc->i; 1892 bj = Bloc->j; 1893 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1894 PetscInt *d_nnz, *g_nnz, *o_nnz; 1895 PetscSFNode *oloc; 1896 PETSC_UNUSED PetscSF sf; 1897 1898 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1899 /* compute d_nnz for preallocation */ 1900 PetscCall(PetscArrayzero(d_nnz, na)); 1901 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1902 /* compute local off-diagonal contributions */ 1903 PetscCall(PetscArrayzero(g_nnz, nb)); 1904 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1905 /* map those to global */ 1906 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1907 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1908 PetscCall(PetscSFSetFromOptions(sf)); 1909 PetscCall(PetscArrayzero(o_nnz, na)); 1910 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1911 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1912 PetscCall(PetscSFDestroy(&sf)); 1913 1914 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1915 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1916 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1917 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1918 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1919 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1920 } else { 1921 B = *matout; 1922 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1923 } 1924 1925 b = (Mat_MPIAIJ *)B->data; 1926 A_diag = a->A; 1927 B_diag = &b->A; 1928 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1929 A_diag_ncol = A_diag->cmap->N; 1930 B_diag_ilen = sub_B_diag->ilen; 1931 B_diag_i = sub_B_diag->i; 1932 1933 /* Set ilen for diagonal of B */ 1934 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1935 1936 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1937 very quickly (=without using MatSetValues), because all writes are local. */ 1938 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1939 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb], &cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i = 0; i < mb; i++) { 1949 ncol = bi[i + 1] - bi[i]; 1950 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1951 row++; 1952 if (pbv) pbv += ncol; 1953 if (cols_tmp) cols_tmp += ncol; 1954 } 1955 PetscCall(PetscFree(cols)); 1956 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1957 1958 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1959 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1960 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1961 *matout = B; 1962 } else { 1963 PetscCall(MatHeaderMerge(A, &B)); 1964 } 1965 PetscFunctionReturn(PETSC_SUCCESS); 1966 } 1967 1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1969 { 1970 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1971 Mat a = aij->A, b = aij->B; 1972 PetscInt s1, s2, s3; 1973 1974 PetscFunctionBegin; 1975 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1976 if (rr) { 1977 PetscCall(VecGetLocalSize(rr, &s1)); 1978 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1979 /* Overlap communication with computation. */ 1980 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1981 } 1982 if (ll) { 1983 PetscCall(VecGetLocalSize(ll, &s1)); 1984 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1985 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1986 } 1987 /* scale the diagonal block */ 1988 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1989 1990 if (rr) { 1991 /* Do a scatter end and then right scale the off-diagonal block */ 1992 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1994 } 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1999 { 2000 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatSetUnfactored(a->A)); 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2008 { 2009 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2010 Mat a, b, c, d; 2011 PetscBool flg; 2012 2013 PetscFunctionBegin; 2014 a = matA->A; 2015 b = matA->B; 2016 c = matB->A; 2017 d = matB->B; 2018 2019 PetscCall(MatEqual(a, c, &flg)); 2020 if (flg) PetscCall(MatEqual(b, d, &flg)); 2021 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2028 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2029 2030 PetscFunctionBegin; 2031 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2032 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2033 /* because of the column compression in the off-processor part of the matrix a->B, 2034 the number of columns in a->B and b->B may be different, hence we cannot call 2035 the MatCopy() directly on the two parts. If need be, we can provide a more 2036 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2037 then copying the submatrices */ 2038 PetscCall(MatCopy_Basic(A, B, str)); 2039 } else { 2040 PetscCall(MatCopy(a->A, b->A, str)); 2041 PetscCall(MatCopy(a->B, b->B, str)); 2042 } 2043 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 /* 2048 Computes the number of nonzeros per row needed for preallocation when X and Y 2049 have different nonzero structure. 2050 */ 2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2052 { 2053 PetscInt i, j, k, nzx, nzy; 2054 2055 PetscFunctionBegin; 2056 /* Set the number of nonzeros in the new matrix */ 2057 for (i = 0; i < m; i++) { 2058 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2059 nzx = xi[i + 1] - xi[i]; 2060 nzy = yi[i + 1] - yi[i]; 2061 nnz[i] = 0; 2062 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2063 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2064 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2065 nnz[i]++; 2066 } 2067 for (; k < nzy; k++) nnz[i]++; 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2074 { 2075 PetscInt m = Y->rmap->N; 2076 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2077 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2078 2079 PetscFunctionBegin; 2080 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2085 { 2086 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 if (str == SAME_NONZERO_PATTERN) { 2090 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2091 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2092 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2093 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d, *nnz_o; 2097 2098 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2099 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2100 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2101 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2102 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2103 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2104 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2105 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2106 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2107 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2108 PetscCall(MatHeaderMerge(Y, &B)); 2109 PetscCall(PetscFree(nnz_d)); 2110 PetscCall(PetscFree(nnz_o)); 2111 } 2112 PetscFunctionReturn(PETSC_SUCCESS); 2113 } 2114 2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2116 2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2118 { 2119 PetscFunctionBegin; 2120 if (PetscDefined(USE_COMPLEX)) { 2121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2122 2123 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2124 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2130 { 2131 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2132 2133 PetscFunctionBegin; 2134 PetscCall(MatRealPart(a->A)); 2135 PetscCall(MatRealPart(a->B)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2142 2143 PetscFunctionBegin; 2144 PetscCall(MatImaginaryPart(a->A)); 2145 PetscCall(MatImaginaryPart(a->B)); 2146 PetscFunctionReturn(PETSC_SUCCESS); 2147 } 2148 2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2152 PetscInt i, *idxb = NULL, m = A->rmap->n; 2153 PetscScalar *va, *vv; 2154 Vec vB, vA; 2155 const PetscScalar *vb; 2156 2157 PetscFunctionBegin; 2158 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2159 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2160 2161 PetscCall(VecGetArrayWrite(vA, &va)); 2162 if (idx) { 2163 for (i = 0; i < m; i++) { 2164 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2165 } 2166 } 2167 2168 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2169 PetscCall(PetscMalloc1(m, &idxb)); 2170 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2171 2172 PetscCall(VecGetArrayWrite(v, &vv)); 2173 PetscCall(VecGetArrayRead(vB, &vb)); 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2176 vv[i] = vb[i]; 2177 if (idx) idx[i] = a->garray[idxb[i]]; 2178 } else { 2179 vv[i] = va[i]; 2180 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2181 } 2182 } 2183 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2184 PetscCall(VecRestoreArrayWrite(vA, &va)); 2185 PetscCall(VecRestoreArrayRead(vB, &vb)); 2186 PetscCall(PetscFree(idxb)); 2187 PetscCall(VecDestroy(&vA)); 2188 PetscCall(VecDestroy(&vB)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2193 { 2194 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2195 Vec vB, vA; 2196 2197 PetscFunctionBegin; 2198 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2199 PetscCall(MatGetRowSumAbs(a->A, vA)); 2200 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2201 PetscCall(MatGetRowSumAbs(a->B, vB)); 2202 PetscCall(VecAXPY(vA, 1.0, vB)); 2203 PetscCall(VecDestroy(&vB)); 2204 PetscCall(VecCopy(vA, v)); 2205 PetscCall(VecDestroy(&vA)); 2206 PetscFunctionReturn(PETSC_SUCCESS); 2207 } 2208 2209 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2210 { 2211 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2212 PetscInt m = A->rmap->n, n = A->cmap->n; 2213 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2214 PetscInt *cmap = mat->garray; 2215 PetscInt *diagIdx, *offdiagIdx; 2216 Vec diagV, offdiagV; 2217 PetscScalar *a, *diagA, *offdiagA; 2218 const PetscScalar *ba, *bav; 2219 PetscInt r, j, col, ncols, *bi, *bj; 2220 Mat B = mat->B; 2221 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2222 2223 PetscFunctionBegin; 2224 /* When a process holds entire A and other processes have no entry */ 2225 if (A->cmap->N == n) { 2226 PetscCall(VecGetArrayWrite(v, &diagA)); 2227 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2228 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2229 PetscCall(VecDestroy(&diagV)); 2230 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2231 PetscFunctionReturn(PETSC_SUCCESS); 2232 } else if (n == 0) { 2233 if (m) { 2234 PetscCall(VecGetArrayWrite(v, &a)); 2235 for (r = 0; r < m; r++) { 2236 a[r] = 0.0; 2237 if (idx) idx[r] = -1; 2238 } 2239 PetscCall(VecRestoreArrayWrite(v, &a)); 2240 } 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } 2243 2244 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2245 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2247 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2248 2249 /* Get offdiagIdx[] for implicit 0.0 */ 2250 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2251 ba = bav; 2252 bi = b->i; 2253 bj = b->j; 2254 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2255 for (r = 0; r < m; r++) { 2256 ncols = bi[r + 1] - bi[r]; 2257 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2258 offdiagA[r] = *ba; 2259 offdiagIdx[r] = cmap[0]; 2260 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2261 offdiagA[r] = 0.0; 2262 2263 /* Find first hole in the cmap */ 2264 for (j = 0; j < ncols; j++) { 2265 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2266 if (col > j && j < cstart) { 2267 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2268 break; 2269 } else if (col > j + n && j >= cstart) { 2270 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2271 break; 2272 } 2273 } 2274 if (j == ncols && ncols < A->cmap->N - n) { 2275 /* a hole is outside compressed Bcols */ 2276 if (ncols == 0) { 2277 if (cstart) { 2278 offdiagIdx[r] = 0; 2279 } else offdiagIdx[r] = cend; 2280 } else { /* ncols > 0 */ 2281 offdiagIdx[r] = cmap[ncols - 1] + 1; 2282 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2283 } 2284 } 2285 } 2286 2287 for (j = 0; j < ncols; j++) { 2288 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2289 offdiagA[r] = *ba; 2290 offdiagIdx[r] = cmap[*bj]; 2291 } 2292 ba++; 2293 bj++; 2294 } 2295 } 2296 2297 PetscCall(VecGetArrayWrite(v, &a)); 2298 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2299 for (r = 0; r < m; ++r) { 2300 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2301 a[r] = diagA[r]; 2302 if (idx) idx[r] = cstart + diagIdx[r]; 2303 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2304 a[r] = diagA[r]; 2305 if (idx) { 2306 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2307 idx[r] = cstart + diagIdx[r]; 2308 } else idx[r] = offdiagIdx[r]; 2309 } 2310 } else { 2311 a[r] = offdiagA[r]; 2312 if (idx) idx[r] = offdiagIdx[r]; 2313 } 2314 } 2315 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2316 PetscCall(VecRestoreArrayWrite(v, &a)); 2317 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2318 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2319 PetscCall(VecDestroy(&diagV)); 2320 PetscCall(VecDestroy(&offdiagV)); 2321 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2322 PetscFunctionReturn(PETSC_SUCCESS); 2323 } 2324 2325 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2326 { 2327 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2328 PetscInt m = A->rmap->n, n = A->cmap->n; 2329 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2330 PetscInt *cmap = mat->garray; 2331 PetscInt *diagIdx, *offdiagIdx; 2332 Vec diagV, offdiagV; 2333 PetscScalar *a, *diagA, *offdiagA; 2334 const PetscScalar *ba, *bav; 2335 PetscInt r, j, col, ncols, *bi, *bj; 2336 Mat B = mat->B; 2337 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2338 2339 PetscFunctionBegin; 2340 /* When a process holds entire A and other processes have no entry */ 2341 if (A->cmap->N == n) { 2342 PetscCall(VecGetArrayWrite(v, &diagA)); 2343 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2344 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2345 PetscCall(VecDestroy(&diagV)); 2346 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2347 PetscFunctionReturn(PETSC_SUCCESS); 2348 } else if (n == 0) { 2349 if (m) { 2350 PetscCall(VecGetArrayWrite(v, &a)); 2351 for (r = 0; r < m; r++) { 2352 a[r] = PETSC_MAX_REAL; 2353 if (idx) idx[r] = -1; 2354 } 2355 PetscCall(VecRestoreArrayWrite(v, &a)); 2356 } 2357 PetscFunctionReturn(PETSC_SUCCESS); 2358 } 2359 2360 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2361 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2364 2365 /* Get offdiagIdx[] for implicit 0.0 */ 2366 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2367 ba = bav; 2368 bi = b->i; 2369 bj = b->j; 2370 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2371 for (r = 0; r < m; r++) { 2372 ncols = bi[r + 1] - bi[r]; 2373 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2374 offdiagA[r] = *ba; 2375 offdiagIdx[r] = cmap[0]; 2376 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2377 offdiagA[r] = 0.0; 2378 2379 /* Find first hole in the cmap */ 2380 for (j = 0; j < ncols; j++) { 2381 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2382 if (col > j && j < cstart) { 2383 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2384 break; 2385 } else if (col > j + n && j >= cstart) { 2386 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2387 break; 2388 } 2389 } 2390 if (j == ncols && ncols < A->cmap->N - n) { 2391 /* a hole is outside compressed Bcols */ 2392 if (ncols == 0) { 2393 if (cstart) { 2394 offdiagIdx[r] = 0; 2395 } else offdiagIdx[r] = cend; 2396 } else { /* ncols > 0 */ 2397 offdiagIdx[r] = cmap[ncols - 1] + 1; 2398 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2399 } 2400 } 2401 } 2402 2403 for (j = 0; j < ncols; j++) { 2404 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2405 offdiagA[r] = *ba; 2406 offdiagIdx[r] = cmap[*bj]; 2407 } 2408 ba++; 2409 bj++; 2410 } 2411 } 2412 2413 PetscCall(VecGetArrayWrite(v, &a)); 2414 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2415 for (r = 0; r < m; ++r) { 2416 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2417 a[r] = diagA[r]; 2418 if (idx) idx[r] = cstart + diagIdx[r]; 2419 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2420 a[r] = diagA[r]; 2421 if (idx) { 2422 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2423 idx[r] = cstart + diagIdx[r]; 2424 } else idx[r] = offdiagIdx[r]; 2425 } 2426 } else { 2427 a[r] = offdiagA[r]; 2428 if (idx) idx[r] = offdiagIdx[r]; 2429 } 2430 } 2431 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2432 PetscCall(VecRestoreArrayWrite(v, &a)); 2433 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2434 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2435 PetscCall(VecDestroy(&diagV)); 2436 PetscCall(VecDestroy(&offdiagV)); 2437 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2438 PetscFunctionReturn(PETSC_SUCCESS); 2439 } 2440 2441 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2442 { 2443 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2444 PetscInt m = A->rmap->n, n = A->cmap->n; 2445 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2446 PetscInt *cmap = mat->garray; 2447 PetscInt *diagIdx, *offdiagIdx; 2448 Vec diagV, offdiagV; 2449 PetscScalar *a, *diagA, *offdiagA; 2450 const PetscScalar *ba, *bav; 2451 PetscInt r, j, col, ncols, *bi, *bj; 2452 Mat B = mat->B; 2453 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2454 2455 PetscFunctionBegin; 2456 /* When a process holds entire A and other processes have no entry */ 2457 if (A->cmap->N == n) { 2458 PetscCall(VecGetArrayWrite(v, &diagA)); 2459 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2460 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2461 PetscCall(VecDestroy(&diagV)); 2462 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2463 PetscFunctionReturn(PETSC_SUCCESS); 2464 } else if (n == 0) { 2465 if (m) { 2466 PetscCall(VecGetArrayWrite(v, &a)); 2467 for (r = 0; r < m; r++) { 2468 a[r] = PETSC_MIN_REAL; 2469 if (idx) idx[r] = -1; 2470 } 2471 PetscCall(VecRestoreArrayWrite(v, &a)); 2472 } 2473 PetscFunctionReturn(PETSC_SUCCESS); 2474 } 2475 2476 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2477 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2478 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2479 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2480 2481 /* Get offdiagIdx[] for implicit 0.0 */ 2482 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2483 ba = bav; 2484 bi = b->i; 2485 bj = b->j; 2486 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2487 for (r = 0; r < m; r++) { 2488 ncols = bi[r + 1] - bi[r]; 2489 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2490 offdiagA[r] = *ba; 2491 offdiagIdx[r] = cmap[0]; 2492 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2493 offdiagA[r] = 0.0; 2494 2495 /* Find first hole in the cmap */ 2496 for (j = 0; j < ncols; j++) { 2497 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2498 if (col > j && j < cstart) { 2499 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2500 break; 2501 } else if (col > j + n && j >= cstart) { 2502 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2503 break; 2504 } 2505 } 2506 if (j == ncols && ncols < A->cmap->N - n) { 2507 /* a hole is outside compressed Bcols */ 2508 if (ncols == 0) { 2509 if (cstart) { 2510 offdiagIdx[r] = 0; 2511 } else offdiagIdx[r] = cend; 2512 } else { /* ncols > 0 */ 2513 offdiagIdx[r] = cmap[ncols - 1] + 1; 2514 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2515 } 2516 } 2517 } 2518 2519 for (j = 0; j < ncols; j++) { 2520 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2521 offdiagA[r] = *ba; 2522 offdiagIdx[r] = cmap[*bj]; 2523 } 2524 ba++; 2525 bj++; 2526 } 2527 } 2528 2529 PetscCall(VecGetArrayWrite(v, &a)); 2530 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2531 for (r = 0; r < m; ++r) { 2532 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2533 a[r] = diagA[r]; 2534 if (idx) idx[r] = cstart + diagIdx[r]; 2535 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2536 a[r] = diagA[r]; 2537 if (idx) { 2538 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2539 idx[r] = cstart + diagIdx[r]; 2540 } else idx[r] = offdiagIdx[r]; 2541 } 2542 } else { 2543 a[r] = offdiagA[r]; 2544 if (idx) idx[r] = offdiagIdx[r]; 2545 } 2546 } 2547 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2548 PetscCall(VecRestoreArrayWrite(v, &a)); 2549 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2550 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2551 PetscCall(VecDestroy(&diagV)); 2552 PetscCall(VecDestroy(&offdiagV)); 2553 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2554 PetscFunctionReturn(PETSC_SUCCESS); 2555 } 2556 2557 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2558 { 2559 Mat *dummy; 2560 2561 PetscFunctionBegin; 2562 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2563 *newmat = *dummy; 2564 PetscCall(PetscFree(dummy)); 2565 PetscFunctionReturn(PETSC_SUCCESS); 2566 } 2567 2568 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2569 { 2570 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2571 2572 PetscFunctionBegin; 2573 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2574 A->factorerrortype = a->A->factorerrortype; 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2579 { 2580 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2581 2582 PetscFunctionBegin; 2583 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2584 PetscCall(MatSetRandom(aij->A, rctx)); 2585 if (x->assembled) { 2586 PetscCall(MatSetRandom(aij->B, rctx)); 2587 } else { 2588 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2589 } 2590 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2591 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2592 PetscFunctionReturn(PETSC_SUCCESS); 2593 } 2594 2595 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2596 { 2597 PetscFunctionBegin; 2598 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2599 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2600 PetscFunctionReturn(PETSC_SUCCESS); 2601 } 2602 2603 /*@ 2604 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2605 2606 Not Collective 2607 2608 Input Parameter: 2609 . A - the matrix 2610 2611 Output Parameter: 2612 . nz - the number of nonzeros 2613 2614 Level: advanced 2615 2616 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2617 @*/ 2618 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2619 { 2620 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2621 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2622 PetscBool isaij; 2623 2624 PetscFunctionBegin; 2625 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2626 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2627 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2628 PetscFunctionReturn(PETSC_SUCCESS); 2629 } 2630 2631 /*@ 2632 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2633 2634 Collective 2635 2636 Input Parameters: 2637 + A - the matrix 2638 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2639 2640 Level: advanced 2641 2642 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2643 @*/ 2644 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2645 { 2646 PetscFunctionBegin; 2647 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2652 { 2653 PetscBool sc = PETSC_FALSE, flg; 2654 2655 PetscFunctionBegin; 2656 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2657 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2658 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2659 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2660 PetscOptionsHeadEnd(); 2661 PetscFunctionReturn(PETSC_SUCCESS); 2662 } 2663 2664 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2665 { 2666 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2667 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2668 2669 PetscFunctionBegin; 2670 if (!Y->preallocated) { 2671 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2672 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2673 PetscInt nonew = aij->nonew; 2674 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2675 aij->nonew = nonew; 2676 } 2677 PetscCall(MatShift_Basic(Y, a)); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2682 { 2683 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2684 2685 PetscFunctionBegin; 2686 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2687 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2688 if (d) { 2689 PetscInt rstart; 2690 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2691 *d += rstart; 2692 } 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2711 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2712 PetscFunctionReturn(PETSC_SUCCESS); 2713 } 2714 2715 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2716 MatGetRow_MPIAIJ, 2717 MatRestoreRow_MPIAIJ, 2718 MatMult_MPIAIJ, 2719 /* 4*/ MatMultAdd_MPIAIJ, 2720 MatMultTranspose_MPIAIJ, 2721 MatMultTransposeAdd_MPIAIJ, 2722 NULL, 2723 NULL, 2724 NULL, 2725 /*10*/ NULL, 2726 NULL, 2727 NULL, 2728 MatSOR_MPIAIJ, 2729 MatTranspose_MPIAIJ, 2730 /*15*/ MatGetInfo_MPIAIJ, 2731 MatEqual_MPIAIJ, 2732 MatGetDiagonal_MPIAIJ, 2733 MatDiagonalScale_MPIAIJ, 2734 MatNorm_MPIAIJ, 2735 /*20*/ MatAssemblyBegin_MPIAIJ, 2736 MatAssemblyEnd_MPIAIJ, 2737 MatSetOption_MPIAIJ, 2738 MatZeroEntries_MPIAIJ, 2739 /*24*/ MatZeroRows_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 NULL, 2744 /*29*/ MatSetUp_MPI_Hash, 2745 NULL, 2746 NULL, 2747 MatGetDiagonalBlock_MPIAIJ, 2748 NULL, 2749 /*34*/ MatDuplicate_MPIAIJ, 2750 NULL, 2751 NULL, 2752 NULL, 2753 NULL, 2754 /*39*/ MatAXPY_MPIAIJ, 2755 MatCreateSubMatrices_MPIAIJ, 2756 MatIncreaseOverlap_MPIAIJ, 2757 MatGetValues_MPIAIJ, 2758 MatCopy_MPIAIJ, 2759 /*44*/ MatGetRowMax_MPIAIJ, 2760 MatScale_MPIAIJ, 2761 MatShift_MPIAIJ, 2762 MatDiagonalSet_MPIAIJ, 2763 MatZeroRowsColumns_MPIAIJ, 2764 /*49*/ MatSetRandom_MPIAIJ, 2765 MatGetRowIJ_MPIAIJ, 2766 MatRestoreRowIJ_MPIAIJ, 2767 NULL, 2768 NULL, 2769 /*54*/ MatFDColoringCreate_MPIXAIJ, 2770 NULL, 2771 MatSetUnfactored_MPIAIJ, 2772 MatPermute_MPIAIJ, 2773 NULL, 2774 /*59*/ MatCreateSubMatrix_MPIAIJ, 2775 MatDestroy_MPIAIJ, 2776 MatView_MPIAIJ, 2777 NULL, 2778 NULL, 2779 /*64*/ NULL, 2780 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2785 MatGetRowMinAbs_MPIAIJ, 2786 NULL, 2787 NULL, 2788 NULL, 2789 NULL, 2790 /*75*/ MatFDColoringApply_AIJ, 2791 MatSetFromOptions_MPIAIJ, 2792 NULL, 2793 NULL, 2794 MatFindZeroDiagonals_MPIAIJ, 2795 /*80*/ NULL, 2796 NULL, 2797 NULL, 2798 /*83*/ MatLoad_MPIAIJ, 2799 NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 /*89*/ NULL, 2805 NULL, 2806 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2807 NULL, 2808 NULL, 2809 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2810 NULL, 2811 NULL, 2812 NULL, 2813 MatBindToCPU_MPIAIJ, 2814 /*99*/ MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 NULL, 2819 /*104*/ MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 NULL, 2824 /*109*/ NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 MatMissingDiagonal_MPIAIJ, 2829 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 NULL, 2834 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 MatGetMultiProcBlock_MPIAIJ, 2839 /*124*/ MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 MatCreateSubMatricesMPI_MPIAIJ, 2844 /*129*/ NULL, 2845 NULL, 2846 NULL, 2847 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2848 NULL, 2849 /*134*/ NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 NULL, 2854 /*139*/ MatSetBlockSizes_MPIAIJ, 2855 NULL, 2856 NULL, 2857 MatFDColoringSetUp_MPIXAIJ, 2858 MatFindOffBlockDiagonalEntries_MPIAIJ, 2859 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2860 /*145*/ NULL, 2861 NULL, 2862 NULL, 2863 MatCreateGraph_Simple_AIJ, 2864 NULL, 2865 /*150*/ NULL, 2866 MatEliminateZeros_MPIAIJ, 2867 MatGetRowSumAbs_MPIAIJ}; 2868 2869 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2870 { 2871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2872 2873 PetscFunctionBegin; 2874 PetscCall(MatStoreValues(aij->A)); 2875 PetscCall(MatStoreValues(aij->B)); 2876 PetscFunctionReturn(PETSC_SUCCESS); 2877 } 2878 2879 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2880 { 2881 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2882 2883 PetscFunctionBegin; 2884 PetscCall(MatRetrieveValues(aij->A)); 2885 PetscCall(MatRetrieveValues(aij->B)); 2886 PetscFunctionReturn(PETSC_SUCCESS); 2887 } 2888 2889 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2890 { 2891 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2892 PetscMPIInt size; 2893 2894 PetscFunctionBegin; 2895 if (B->hash_active) { 2896 B->ops[0] = b->cops; 2897 B->hash_active = PETSC_FALSE; 2898 } 2899 PetscCall(PetscLayoutSetUp(B->rmap)); 2900 PetscCall(PetscLayoutSetUp(B->cmap)); 2901 2902 #if defined(PETSC_USE_CTABLE) 2903 PetscCall(PetscHMapIDestroy(&b->colmap)); 2904 #else 2905 PetscCall(PetscFree(b->colmap)); 2906 #endif 2907 PetscCall(PetscFree(b->garray)); 2908 PetscCall(VecDestroy(&b->lvec)); 2909 PetscCall(VecScatterDestroy(&b->Mvctx)); 2910 2911 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2912 2913 MatSeqXAIJGetOptions_Private(b->B); 2914 PetscCall(MatDestroy(&b->B)); 2915 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2916 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2917 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2918 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2919 MatSeqXAIJRestoreOptions_Private(b->B); 2920 2921 MatSeqXAIJGetOptions_Private(b->A); 2922 PetscCall(MatDestroy(&b->A)); 2923 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2924 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2925 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2926 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2927 MatSeqXAIJRestoreOptions_Private(b->A); 2928 2929 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2930 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2931 B->preallocated = PETSC_TRUE; 2932 B->was_assembled = PETSC_FALSE; 2933 B->assembled = PETSC_FALSE; 2934 PetscFunctionReturn(PETSC_SUCCESS); 2935 } 2936 2937 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2938 { 2939 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2943 PetscCall(PetscLayoutSetUp(B->rmap)); 2944 PetscCall(PetscLayoutSetUp(B->cmap)); 2945 2946 #if defined(PETSC_USE_CTABLE) 2947 PetscCall(PetscHMapIDestroy(&b->colmap)); 2948 #else 2949 PetscCall(PetscFree(b->colmap)); 2950 #endif 2951 PetscCall(PetscFree(b->garray)); 2952 PetscCall(VecDestroy(&b->lvec)); 2953 PetscCall(VecScatterDestroy(&b->Mvctx)); 2954 2955 PetscCall(MatResetPreallocation(b->A)); 2956 PetscCall(MatResetPreallocation(b->B)); 2957 B->preallocated = PETSC_TRUE; 2958 B->was_assembled = PETSC_FALSE; 2959 B->assembled = PETSC_FALSE; 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2964 { 2965 Mat mat; 2966 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2967 2968 PetscFunctionBegin; 2969 *newmat = NULL; 2970 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2971 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2972 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2973 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2974 a = (Mat_MPIAIJ *)mat->data; 2975 2976 mat->factortype = matin->factortype; 2977 mat->assembled = matin->assembled; 2978 mat->insertmode = NOT_SET_VALUES; 2979 2980 a->size = oldmat->size; 2981 a->rank = oldmat->rank; 2982 a->donotstash = oldmat->donotstash; 2983 a->roworiented = oldmat->roworiented; 2984 a->rowindices = NULL; 2985 a->rowvalues = NULL; 2986 a->getrowactive = PETSC_FALSE; 2987 2988 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2989 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2990 if (matin->hash_active) { 2991 PetscCall(MatSetUp(mat)); 2992 } else { 2993 mat->preallocated = matin->preallocated; 2994 if (oldmat->colmap) { 2995 #if defined(PETSC_USE_CTABLE) 2996 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2997 #else 2998 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2999 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3000 #endif 3001 } else a->colmap = NULL; 3002 if (oldmat->garray) { 3003 PetscInt len; 3004 len = oldmat->B->cmap->n; 3005 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3006 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3007 } else a->garray = NULL; 3008 3009 /* It may happen MatDuplicate is called with a non-assembled matrix 3010 In fact, MatDuplicate only requires the matrix to be preallocated 3011 This may happen inside a DMCreateMatrix_Shell */ 3012 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3013 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3014 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3015 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3016 } 3017 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3018 *newmat = mat; 3019 PetscFunctionReturn(PETSC_SUCCESS); 3020 } 3021 3022 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3023 { 3024 PetscBool isbinary, ishdf5; 3025 3026 PetscFunctionBegin; 3027 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3028 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3029 /* force binary viewer to load .info file if it has not yet done so */ 3030 PetscCall(PetscViewerSetUp(viewer)); 3031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3033 if (isbinary) { 3034 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3035 } else if (ishdf5) { 3036 #if defined(PETSC_HAVE_HDF5) 3037 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3038 #else 3039 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3040 #endif 3041 } else { 3042 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3043 } 3044 PetscFunctionReturn(PETSC_SUCCESS); 3045 } 3046 3047 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3048 { 3049 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3050 PetscInt *rowidxs, *colidxs; 3051 PetscScalar *matvals; 3052 3053 PetscFunctionBegin; 3054 PetscCall(PetscViewerSetUp(viewer)); 3055 3056 /* read in matrix header */ 3057 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3058 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3059 M = header[1]; 3060 N = header[2]; 3061 nz = header[3]; 3062 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3063 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3064 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3065 3066 /* set block sizes from the viewer's .info file */ 3067 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3068 /* set global sizes if not set already */ 3069 if (mat->rmap->N < 0) mat->rmap->N = M; 3070 if (mat->cmap->N < 0) mat->cmap->N = N; 3071 PetscCall(PetscLayoutSetUp(mat->rmap)); 3072 PetscCall(PetscLayoutSetUp(mat->cmap)); 3073 3074 /* check if the matrix sizes are correct */ 3075 PetscCall(MatGetSize(mat, &rows, &cols)); 3076 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3077 3078 /* read in row lengths and build row indices */ 3079 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3080 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3081 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3082 rowidxs[0] = 0; 3083 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3084 if (nz != PETSC_MAX_INT) { 3085 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3086 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3087 } 3088 3089 /* read in column indices and matrix values */ 3090 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3093 /* store matrix indices and values */ 3094 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3095 PetscCall(PetscFree(rowidxs)); 3096 PetscCall(PetscFree2(colidxs, matvals)); 3097 PetscFunctionReturn(PETSC_SUCCESS); 3098 } 3099 3100 /* Not scalable because of ISAllGather() unless getting all columns. */ 3101 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3102 { 3103 IS iscol_local; 3104 PetscBool isstride; 3105 PetscMPIInt lisstride = 0, gisstride; 3106 3107 PetscFunctionBegin; 3108 /* check if we are grabbing all columns*/ 3109 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3110 3111 if (isstride) { 3112 PetscInt start, len, mstart, mlen; 3113 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3114 PetscCall(ISGetLocalSize(iscol, &len)); 3115 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3116 if (mstart == start && mlen - mstart == len) lisstride = 1; 3117 } 3118 3119 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3120 if (gisstride) { 3121 PetscInt N; 3122 PetscCall(MatGetSize(mat, NULL, &N)); 3123 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3124 PetscCall(ISSetIdentity(iscol_local)); 3125 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3126 } else { 3127 PetscInt cbs; 3128 PetscCall(ISGetBlockSize(iscol, &cbs)); 3129 PetscCall(ISAllGather(iscol, &iscol_local)); 3130 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3131 } 3132 3133 *isseq = iscol_local; 3134 PetscFunctionReturn(PETSC_SUCCESS); 3135 } 3136 3137 /* 3138 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3139 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3140 3141 Input Parameters: 3142 + mat - matrix 3143 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3144 i.e., mat->rstart <= isrow[i] < mat->rend 3145 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3146 i.e., mat->cstart <= iscol[i] < mat->cend 3147 3148 Output Parameters: 3149 + isrow_d - sequential row index set for retrieving mat->A 3150 . iscol_d - sequential column index set for retrieving mat->A 3151 . iscol_o - sequential column index set for retrieving mat->B 3152 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3153 */ 3154 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3155 { 3156 Vec x, cmap; 3157 const PetscInt *is_idx; 3158 PetscScalar *xarray, *cmaparray; 3159 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3161 Mat B = a->B; 3162 Vec lvec = a->lvec, lcmap; 3163 PetscInt i, cstart, cend, Bn = B->cmap->N; 3164 MPI_Comm comm; 3165 VecScatter Mvctx = a->Mvctx; 3166 3167 PetscFunctionBegin; 3168 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3169 PetscCall(ISGetLocalSize(iscol, &ncols)); 3170 3171 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3172 PetscCall(MatCreateVecs(mat, &x, NULL)); 3173 PetscCall(VecSet(x, -1.0)); 3174 PetscCall(VecDuplicate(x, &cmap)); 3175 PetscCall(VecSet(cmap, -1.0)); 3176 3177 /* Get start indices */ 3178 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3179 isstart -= ncols; 3180 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3181 3182 PetscCall(ISGetIndices(iscol, &is_idx)); 3183 PetscCall(VecGetArray(x, &xarray)); 3184 PetscCall(VecGetArray(cmap, &cmaparray)); 3185 PetscCall(PetscMalloc1(ncols, &idx)); 3186 for (i = 0; i < ncols; i++) { 3187 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3188 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3189 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3190 } 3191 PetscCall(VecRestoreArray(x, &xarray)); 3192 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3193 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3194 3195 /* Get iscol_d */ 3196 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3197 PetscCall(ISGetBlockSize(iscol, &i)); 3198 PetscCall(ISSetBlockSize(*iscol_d, i)); 3199 3200 /* Get isrow_d */ 3201 PetscCall(ISGetLocalSize(isrow, &m)); 3202 rstart = mat->rmap->rstart; 3203 PetscCall(PetscMalloc1(m, &idx)); 3204 PetscCall(ISGetIndices(isrow, &is_idx)); 3205 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3206 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3207 3208 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3209 PetscCall(ISGetBlockSize(isrow, &i)); 3210 PetscCall(ISSetBlockSize(*isrow_d, i)); 3211 3212 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3213 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3214 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3215 3216 PetscCall(VecDuplicate(lvec, &lcmap)); 3217 3218 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3219 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3220 3221 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3222 /* off-process column indices */ 3223 count = 0; 3224 PetscCall(PetscMalloc1(Bn, &idx)); 3225 PetscCall(PetscMalloc1(Bn, &cmap1)); 3226 3227 PetscCall(VecGetArray(lvec, &xarray)); 3228 PetscCall(VecGetArray(lcmap, &cmaparray)); 3229 for (i = 0; i < Bn; i++) { 3230 if (PetscRealPart(xarray[i]) > -1.0) { 3231 idx[count] = i; /* local column index in off-diagonal part B */ 3232 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3233 count++; 3234 } 3235 } 3236 PetscCall(VecRestoreArray(lvec, &xarray)); 3237 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3238 3239 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3240 /* cannot ensure iscol_o has same blocksize as iscol! */ 3241 3242 PetscCall(PetscFree(idx)); 3243 *garray = cmap1; 3244 3245 PetscCall(VecDestroy(&x)); 3246 PetscCall(VecDestroy(&cmap)); 3247 PetscCall(VecDestroy(&lcmap)); 3248 PetscFunctionReturn(PETSC_SUCCESS); 3249 } 3250 3251 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3252 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3253 { 3254 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3255 Mat M = NULL; 3256 MPI_Comm comm; 3257 IS iscol_d, isrow_d, iscol_o; 3258 Mat Asub = NULL, Bsub = NULL; 3259 PetscInt n; 3260 3261 PetscFunctionBegin; 3262 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3263 3264 if (call == MAT_REUSE_MATRIX) { 3265 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3266 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3267 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3268 3269 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3270 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3271 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3273 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3274 3275 /* Update diagonal and off-diagonal portions of submat */ 3276 asub = (Mat_MPIAIJ *)(*submat)->data; 3277 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3278 PetscCall(ISGetLocalSize(iscol_o, &n)); 3279 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3280 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3281 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3282 3283 } else { /* call == MAT_INITIAL_MATRIX) */ 3284 const PetscInt *garray; 3285 PetscInt BsubN; 3286 3287 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3288 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3289 3290 /* Create local submatrices Asub and Bsub */ 3291 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3292 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3293 3294 /* Create submatrix M */ 3295 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3296 3297 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3298 asub = (Mat_MPIAIJ *)M->data; 3299 3300 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3301 n = asub->B->cmap->N; 3302 if (BsubN > n) { 3303 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3304 const PetscInt *idx; 3305 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3306 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3307 3308 PetscCall(PetscMalloc1(n, &idx_new)); 3309 j = 0; 3310 PetscCall(ISGetIndices(iscol_o, &idx)); 3311 for (i = 0; i < n; i++) { 3312 if (j >= BsubN) break; 3313 while (subgarray[i] > garray[j]) j++; 3314 3315 if (subgarray[i] == garray[j]) { 3316 idx_new[i] = idx[j++]; 3317 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3318 } 3319 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3320 3321 PetscCall(ISDestroy(&iscol_o)); 3322 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3323 3324 } else if (BsubN < n) { 3325 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3326 } 3327 3328 PetscCall(PetscFree(garray)); 3329 *submat = M; 3330 3331 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3332 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3333 PetscCall(ISDestroy(&isrow_d)); 3334 3335 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3336 PetscCall(ISDestroy(&iscol_d)); 3337 3338 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3339 PetscCall(ISDestroy(&iscol_o)); 3340 } 3341 PetscFunctionReturn(PETSC_SUCCESS); 3342 } 3343 3344 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3345 { 3346 IS iscol_local = NULL, isrow_d; 3347 PetscInt csize; 3348 PetscInt n, i, j, start, end; 3349 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3350 MPI_Comm comm; 3351 3352 PetscFunctionBegin; 3353 /* If isrow has same processor distribution as mat, 3354 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3355 if (call == MAT_REUSE_MATRIX) { 3356 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3357 if (isrow_d) { 3358 sameRowDist = PETSC_TRUE; 3359 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3360 } else { 3361 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3362 if (iscol_local) { 3363 sameRowDist = PETSC_TRUE; 3364 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3365 } 3366 } 3367 } else { 3368 /* Check if isrow has same processor distribution as mat */ 3369 sameDist[0] = PETSC_FALSE; 3370 PetscCall(ISGetLocalSize(isrow, &n)); 3371 if (!n) { 3372 sameDist[0] = PETSC_TRUE; 3373 } else { 3374 PetscCall(ISGetMinMax(isrow, &i, &j)); 3375 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3376 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3377 } 3378 3379 /* Check if iscol has same processor distribution as mat */ 3380 sameDist[1] = PETSC_FALSE; 3381 PetscCall(ISGetLocalSize(iscol, &n)); 3382 if (!n) { 3383 sameDist[1] = PETSC_TRUE; 3384 } else { 3385 PetscCall(ISGetMinMax(iscol, &i, &j)); 3386 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3387 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3388 } 3389 3390 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3391 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3392 sameRowDist = tsameDist[0]; 3393 } 3394 3395 if (sameRowDist) { 3396 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3397 /* isrow and iscol have same processor distribution as mat */ 3398 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } else { /* sameRowDist */ 3401 /* isrow has same processor distribution as mat */ 3402 if (call == MAT_INITIAL_MATRIX) { 3403 PetscBool sorted; 3404 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3405 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3406 PetscCall(ISGetSize(iscol, &i)); 3407 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3408 3409 PetscCall(ISSorted(iscol_local, &sorted)); 3410 if (sorted) { 3411 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3412 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } 3415 } else { /* call == MAT_REUSE_MATRIX */ 3416 IS iscol_sub; 3417 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3418 if (iscol_sub) { 3419 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 } 3423 } 3424 } 3425 3426 /* General case: iscol -> iscol_local which has global size of iscol */ 3427 if (call == MAT_REUSE_MATRIX) { 3428 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3429 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3430 } else { 3431 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3432 } 3433 3434 PetscCall(ISGetLocalSize(iscol, &csize)); 3435 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3436 3437 if (call == MAT_INITIAL_MATRIX) { 3438 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3439 PetscCall(ISDestroy(&iscol_local)); 3440 } 3441 PetscFunctionReturn(PETSC_SUCCESS); 3442 } 3443 3444 /*@C 3445 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3446 and "off-diagonal" part of the matrix in CSR format. 3447 3448 Collective 3449 3450 Input Parameters: 3451 + comm - MPI communicator 3452 . A - "diagonal" portion of matrix 3453 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3454 - garray - global index of `B` columns 3455 3456 Output Parameter: 3457 . mat - the matrix, with input `A` as its local diagonal matrix 3458 3459 Level: advanced 3460 3461 Notes: 3462 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3463 3464 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3465 3466 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3467 @*/ 3468 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3469 { 3470 Mat_MPIAIJ *maij; 3471 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3472 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3473 const PetscScalar *oa; 3474 Mat Bnew; 3475 PetscInt m, n, N; 3476 MatType mpi_mat_type; 3477 3478 PetscFunctionBegin; 3479 PetscCall(MatCreate(comm, mat)); 3480 PetscCall(MatGetSize(A, &m, &n)); 3481 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3482 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3483 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3484 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3485 3486 /* Get global columns of mat */ 3487 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3488 3489 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3490 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3491 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3492 PetscCall(MatSetType(*mat, mpi_mat_type)); 3493 3494 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3495 maij = (Mat_MPIAIJ *)(*mat)->data; 3496 3497 (*mat)->preallocated = PETSC_TRUE; 3498 3499 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3500 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3501 3502 /* Set A as diagonal portion of *mat */ 3503 maij->A = A; 3504 3505 nz = oi[m]; 3506 for (i = 0; i < nz; i++) { 3507 col = oj[i]; 3508 oj[i] = garray[col]; 3509 } 3510 3511 /* Set Bnew as off-diagonal portion of *mat */ 3512 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3513 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3514 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3515 bnew = (Mat_SeqAIJ *)Bnew->data; 3516 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3517 maij->B = Bnew; 3518 3519 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3520 3521 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3522 b->free_a = PETSC_FALSE; 3523 b->free_ij = PETSC_FALSE; 3524 PetscCall(MatDestroy(&B)); 3525 3526 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3527 bnew->free_a = PETSC_TRUE; 3528 bnew->free_ij = PETSC_TRUE; 3529 3530 /* condense columns of maij->B */ 3531 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3532 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3533 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3534 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3535 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3536 PetscFunctionReturn(PETSC_SUCCESS); 3537 } 3538 3539 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3540 3541 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3542 { 3543 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3544 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3545 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3546 Mat M, Msub, B = a->B; 3547 MatScalar *aa; 3548 Mat_SeqAIJ *aij; 3549 PetscInt *garray = a->garray, *colsub, Ncols; 3550 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3551 IS iscol_sub, iscmap; 3552 const PetscInt *is_idx, *cmap; 3553 PetscBool allcolumns = PETSC_FALSE; 3554 MPI_Comm comm; 3555 3556 PetscFunctionBegin; 3557 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3558 if (call == MAT_REUSE_MATRIX) { 3559 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3560 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3561 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3562 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3564 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3565 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3567 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3568 3569 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3570 3571 } else { /* call == MAT_INITIAL_MATRIX) */ 3572 PetscBool flg; 3573 3574 PetscCall(ISGetLocalSize(iscol, &n)); 3575 PetscCall(ISGetSize(iscol, &Ncols)); 3576 3577 /* (1) iscol -> nonscalable iscol_local */ 3578 /* Check for special case: each processor gets entire matrix columns */ 3579 PetscCall(ISIdentity(iscol_local, &flg)); 3580 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3581 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3582 if (allcolumns) { 3583 iscol_sub = iscol_local; 3584 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3585 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3586 3587 } else { 3588 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3589 PetscInt *idx, *cmap1, k; 3590 PetscCall(PetscMalloc1(Ncols, &idx)); 3591 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3592 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3593 count = 0; 3594 k = 0; 3595 for (i = 0; i < Ncols; i++) { 3596 j = is_idx[i]; 3597 if (j >= cstart && j < cend) { 3598 /* diagonal part of mat */ 3599 idx[count] = j; 3600 cmap1[count++] = i; /* column index in submat */ 3601 } else if (Bn) { 3602 /* off-diagonal part of mat */ 3603 if (j == garray[k]) { 3604 idx[count] = j; 3605 cmap1[count++] = i; /* column index in submat */ 3606 } else if (j > garray[k]) { 3607 while (j > garray[k] && k < Bn - 1) k++; 3608 if (j == garray[k]) { 3609 idx[count] = j; 3610 cmap1[count++] = i; /* column index in submat */ 3611 } 3612 } 3613 } 3614 } 3615 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3616 3617 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3618 PetscCall(ISGetBlockSize(iscol, &cbs)); 3619 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3620 3621 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3622 } 3623 3624 /* (3) Create sequential Msub */ 3625 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3626 } 3627 3628 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3629 aij = (Mat_SeqAIJ *)(Msub)->data; 3630 ii = aij->i; 3631 PetscCall(ISGetIndices(iscmap, &cmap)); 3632 3633 /* 3634 m - number of local rows 3635 Ncols - number of columns (same on all processors) 3636 rstart - first row in new global matrix generated 3637 */ 3638 PetscCall(MatGetSize(Msub, &m, NULL)); 3639 3640 if (call == MAT_INITIAL_MATRIX) { 3641 /* (4) Create parallel newmat */ 3642 PetscMPIInt rank, size; 3643 PetscInt csize; 3644 3645 PetscCallMPI(MPI_Comm_size(comm, &size)); 3646 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3647 3648 /* 3649 Determine the number of non-zeros in the diagonal and off-diagonal 3650 portions of the matrix in order to do correct preallocation 3651 */ 3652 3653 /* first get start and end of "diagonal" columns */ 3654 PetscCall(ISGetLocalSize(iscol, &csize)); 3655 if (csize == PETSC_DECIDE) { 3656 PetscCall(ISGetSize(isrow, &mglobal)); 3657 if (mglobal == Ncols) { /* square matrix */ 3658 nlocal = m; 3659 } else { 3660 nlocal = Ncols / size + ((Ncols % size) > rank); 3661 } 3662 } else { 3663 nlocal = csize; 3664 } 3665 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3666 rstart = rend - nlocal; 3667 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3668 3669 /* next, compute all the lengths */ 3670 jj = aij->j; 3671 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3672 olens = dlens + m; 3673 for (i = 0; i < m; i++) { 3674 jend = ii[i + 1] - ii[i]; 3675 olen = 0; 3676 dlen = 0; 3677 for (j = 0; j < jend; j++) { 3678 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3679 else dlen++; 3680 jj++; 3681 } 3682 olens[i] = olen; 3683 dlens[i] = dlen; 3684 } 3685 3686 PetscCall(ISGetBlockSize(isrow, &bs)); 3687 PetscCall(ISGetBlockSize(iscol, &cbs)); 3688 3689 PetscCall(MatCreate(comm, &M)); 3690 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3691 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3692 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3693 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3694 PetscCall(PetscFree(dlens)); 3695 3696 } else { /* call == MAT_REUSE_MATRIX */ 3697 M = *newmat; 3698 PetscCall(MatGetLocalSize(M, &i, NULL)); 3699 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3700 PetscCall(MatZeroEntries(M)); 3701 /* 3702 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3703 rather than the slower MatSetValues(). 3704 */ 3705 M->was_assembled = PETSC_TRUE; 3706 M->assembled = PETSC_FALSE; 3707 } 3708 3709 /* (5) Set values of Msub to *newmat */ 3710 PetscCall(PetscMalloc1(count, &colsub)); 3711 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3712 3713 jj = aij->j; 3714 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3715 for (i = 0; i < m; i++) { 3716 row = rstart + i; 3717 nz = ii[i + 1] - ii[i]; 3718 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3719 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3720 jj += nz; 3721 aa += nz; 3722 } 3723 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3724 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3725 3726 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3727 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3728 3729 PetscCall(PetscFree(colsub)); 3730 3731 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3732 if (call == MAT_INITIAL_MATRIX) { 3733 *newmat = M; 3734 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3735 PetscCall(MatDestroy(&Msub)); 3736 3737 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3738 PetscCall(ISDestroy(&iscol_sub)); 3739 3740 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3741 PetscCall(ISDestroy(&iscmap)); 3742 3743 if (iscol_local) { 3744 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3745 PetscCall(ISDestroy(&iscol_local)); 3746 } 3747 } 3748 PetscFunctionReturn(PETSC_SUCCESS); 3749 } 3750 3751 /* 3752 Not great since it makes two copies of the submatrix, first an SeqAIJ 3753 in local and then by concatenating the local matrices the end result. 3754 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3755 3756 This requires a sequential iscol with all indices. 3757 */ 3758 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3759 { 3760 PetscMPIInt rank, size; 3761 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3762 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3763 Mat M, Mreuse; 3764 MatScalar *aa, *vwork; 3765 MPI_Comm comm; 3766 Mat_SeqAIJ *aij; 3767 PetscBool colflag, allcolumns = PETSC_FALSE; 3768 3769 PetscFunctionBegin; 3770 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3771 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3772 PetscCallMPI(MPI_Comm_size(comm, &size)); 3773 3774 /* Check for special case: each processor gets entire matrix columns */ 3775 PetscCall(ISIdentity(iscol, &colflag)); 3776 PetscCall(ISGetLocalSize(iscol, &n)); 3777 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3778 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3779 3780 if (call == MAT_REUSE_MATRIX) { 3781 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3782 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3783 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3784 } else { 3785 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3786 } 3787 3788 /* 3789 m - number of local rows 3790 n - number of columns (same on all processors) 3791 rstart - first row in new global matrix generated 3792 */ 3793 PetscCall(MatGetSize(Mreuse, &m, &n)); 3794 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3795 if (call == MAT_INITIAL_MATRIX) { 3796 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3797 ii = aij->i; 3798 jj = aij->j; 3799 3800 /* 3801 Determine the number of non-zeros in the diagonal and off-diagonal 3802 portions of the matrix in order to do correct preallocation 3803 */ 3804 3805 /* first get start and end of "diagonal" columns */ 3806 if (csize == PETSC_DECIDE) { 3807 PetscCall(ISGetSize(isrow, &mglobal)); 3808 if (mglobal == n) { /* square matrix */ 3809 nlocal = m; 3810 } else { 3811 nlocal = n / size + ((n % size) > rank); 3812 } 3813 } else { 3814 nlocal = csize; 3815 } 3816 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3817 rstart = rend - nlocal; 3818 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3819 3820 /* next, compute all the lengths */ 3821 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3822 olens = dlens + m; 3823 for (i = 0; i < m; i++) { 3824 jend = ii[i + 1] - ii[i]; 3825 olen = 0; 3826 dlen = 0; 3827 for (j = 0; j < jend; j++) { 3828 if (*jj < rstart || *jj >= rend) olen++; 3829 else dlen++; 3830 jj++; 3831 } 3832 olens[i] = olen; 3833 dlens[i] = dlen; 3834 } 3835 PetscCall(MatCreate(comm, &M)); 3836 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3837 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3838 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3839 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3840 PetscCall(PetscFree(dlens)); 3841 } else { 3842 PetscInt ml, nl; 3843 3844 M = *newmat; 3845 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3846 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3847 PetscCall(MatZeroEntries(M)); 3848 /* 3849 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3850 rather than the slower MatSetValues(). 3851 */ 3852 M->was_assembled = PETSC_TRUE; 3853 M->assembled = PETSC_FALSE; 3854 } 3855 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3856 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3857 ii = aij->i; 3858 jj = aij->j; 3859 3860 /* trigger copy to CPU if needed */ 3861 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3862 for (i = 0; i < m; i++) { 3863 row = rstart + i; 3864 nz = ii[i + 1] - ii[i]; 3865 cwork = jj; 3866 jj = PetscSafePointerPlusOffset(jj, nz); 3867 vwork = aa; 3868 aa = PetscSafePointerPlusOffset(aa, nz); 3869 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3870 } 3871 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3872 3873 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3874 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3875 *newmat = M; 3876 3877 /* save submatrix used in processor for next request */ 3878 if (call == MAT_INITIAL_MATRIX) { 3879 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3880 PetscCall(MatDestroy(&Mreuse)); 3881 } 3882 PetscFunctionReturn(PETSC_SUCCESS); 3883 } 3884 3885 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3886 { 3887 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3888 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3889 const PetscInt *JJ; 3890 PetscBool nooffprocentries; 3891 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3892 3893 PetscFunctionBegin; 3894 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3895 3896 PetscCall(PetscLayoutSetUp(B->rmap)); 3897 PetscCall(PetscLayoutSetUp(B->cmap)); 3898 m = B->rmap->n; 3899 cstart = B->cmap->rstart; 3900 cend = B->cmap->rend; 3901 rstart = B->rmap->rstart; 3902 3903 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3904 3905 if (PetscDefined(USE_DEBUG)) { 3906 for (i = 0; i < m; i++) { 3907 nnz = Ii[i + 1] - Ii[i]; 3908 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3909 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3910 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3911 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3912 } 3913 } 3914 3915 for (i = 0; i < m; i++) { 3916 nnz = Ii[i + 1] - Ii[i]; 3917 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3918 nnz_max = PetscMax(nnz_max, nnz); 3919 d = 0; 3920 for (j = 0; j < nnz; j++) { 3921 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3922 } 3923 d_nnz[i] = d; 3924 o_nnz[i] = nnz - d; 3925 } 3926 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3927 PetscCall(PetscFree2(d_nnz, o_nnz)); 3928 3929 for (i = 0; i < m; i++) { 3930 ii = i + rstart; 3931 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3932 } 3933 nooffprocentries = B->nooffprocentries; 3934 B->nooffprocentries = PETSC_TRUE; 3935 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3936 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3937 B->nooffprocentries = nooffprocentries; 3938 3939 /* count number of entries below block diagonal */ 3940 PetscCall(PetscFree(Aij->ld)); 3941 PetscCall(PetscCalloc1(m, &ld)); 3942 Aij->ld = ld; 3943 for (i = 0; i < m; i++) { 3944 nnz = Ii[i + 1] - Ii[i]; 3945 j = 0; 3946 while (j < nnz && J[j] < cstart) j++; 3947 ld[i] = j; 3948 if (J) J += nnz; 3949 } 3950 3951 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3952 PetscFunctionReturn(PETSC_SUCCESS); 3953 } 3954 3955 /*@ 3956 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3957 (the default parallel PETSc format). 3958 3959 Collective 3960 3961 Input Parameters: 3962 + B - the matrix 3963 . i - the indices into `j` for the start of each local row (indices start with zero) 3964 . j - the column indices for each local row (indices start with zero) 3965 - v - optional values in the matrix 3966 3967 Level: developer 3968 3969 Notes: 3970 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3971 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3972 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3973 3974 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3975 3976 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3977 3978 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3979 3980 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3981 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3982 3983 The format which is used for the sparse matrix input, is equivalent to a 3984 row-major ordering.. i.e for the following matrix, the input data expected is 3985 as shown 3986 .vb 3987 1 0 0 3988 2 0 3 P0 3989 ------- 3990 4 5 6 P1 3991 3992 Process0 [P0] rows_owned=[0,1] 3993 i = {0,1,3} [size = nrow+1 = 2+1] 3994 j = {0,0,2} [size = 3] 3995 v = {1,2,3} [size = 3] 3996 3997 Process1 [P1] rows_owned=[2] 3998 i = {0,3} [size = nrow+1 = 1+1] 3999 j = {0,1,2} [size = 3] 4000 v = {4,5,6} [size = 3] 4001 .ve 4002 4003 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4004 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4005 @*/ 4006 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4007 { 4008 PetscFunctionBegin; 4009 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4010 PetscFunctionReturn(PETSC_SUCCESS); 4011 } 4012 4013 /*@C 4014 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4015 (the default parallel PETSc format). For good matrix assembly performance 4016 the user should preallocate the matrix storage by setting the parameters 4017 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4018 4019 Collective 4020 4021 Input Parameters: 4022 + B - the matrix 4023 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4024 (same value is used for all local rows) 4025 . d_nnz - array containing the number of nonzeros in the various rows of the 4026 DIAGONAL portion of the local submatrix (possibly different for each row) 4027 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4028 The size of this array is equal to the number of local rows, i.e 'm'. 4029 For matrices that will be factored, you must leave room for (and set) 4030 the diagonal entry even if it is zero. 4031 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4032 submatrix (same value is used for all local rows). 4033 - o_nnz - array containing the number of nonzeros in the various rows of the 4034 OFF-DIAGONAL portion of the local submatrix (possibly different for 4035 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4036 structure. The size of this array is equal to the number 4037 of local rows, i.e 'm'. 4038 4039 Example Usage: 4040 Consider the following 8x8 matrix with 34 non-zero values, that is 4041 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4042 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4043 as follows 4044 4045 .vb 4046 1 2 0 | 0 3 0 | 0 4 4047 Proc0 0 5 6 | 7 0 0 | 8 0 4048 9 0 10 | 11 0 0 | 12 0 4049 ------------------------------------- 4050 13 0 14 | 15 16 17 | 0 0 4051 Proc1 0 18 0 | 19 20 21 | 0 0 4052 0 0 0 | 22 23 0 | 24 0 4053 ------------------------------------- 4054 Proc2 25 26 27 | 0 0 28 | 29 0 4055 30 0 0 | 31 32 33 | 0 34 4056 .ve 4057 4058 This can be represented as a collection of submatrices as 4059 .vb 4060 A B C 4061 D E F 4062 G H I 4063 .ve 4064 4065 Where the submatrices A,B,C are owned by proc0, D,E,F are 4066 owned by proc1, G,H,I are owned by proc2. 4067 4068 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4069 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4070 The 'M','N' parameters are 8,8, and have the same values on all procs. 4071 4072 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4073 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4074 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4075 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4076 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4077 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4078 4079 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4080 allocated for every row of the local diagonal submatrix, and `o_nz` 4081 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4082 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4083 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4084 In this case, the values of `d_nz`, `o_nz` are 4085 .vb 4086 proc0 dnz = 2, o_nz = 2 4087 proc1 dnz = 3, o_nz = 2 4088 proc2 dnz = 1, o_nz = 4 4089 .ve 4090 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4091 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4092 for proc3. i.e we are using 12+15+10=37 storage locations to store 4093 34 values. 4094 4095 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4096 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4097 In the above case the values for `d_nnz`, `o_nnz` are 4098 .vb 4099 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4100 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4101 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4102 .ve 4103 Here the space allocated is sum of all the above values i.e 34, and 4104 hence pre-allocation is perfect. 4105 4106 Level: intermediate 4107 4108 Notes: 4109 If the *_nnz parameter is given then the *_nz parameter is ignored 4110 4111 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4112 storage. The stored row and column indices begin with zero. 4113 See [Sparse Matrices](sec_matsparse) for details. 4114 4115 The parallel matrix is partitioned such that the first m0 rows belong to 4116 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4117 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4118 4119 The DIAGONAL portion of the local submatrix of a processor can be defined 4120 as the submatrix which is obtained by extraction the part corresponding to 4121 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4122 first row that belongs to the processor, r2 is the last row belonging to 4123 the this processor, and c1-c2 is range of indices of the local part of a 4124 vector suitable for applying the matrix to. This is an mxn matrix. In the 4125 common case of a square matrix, the row and column ranges are the same and 4126 the DIAGONAL part is also square. The remaining portion of the local 4127 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4128 4129 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4130 4131 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4132 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4133 You can also run with the option `-info` and look for messages with the string 4134 malloc in them to see if additional memory allocation was needed. 4135 4136 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4137 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4138 @*/ 4139 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4140 { 4141 PetscFunctionBegin; 4142 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4143 PetscValidType(B, 1); 4144 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4145 PetscFunctionReturn(PETSC_SUCCESS); 4146 } 4147 4148 /*@ 4149 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4150 CSR format for the local rows. 4151 4152 Collective 4153 4154 Input Parameters: 4155 + comm - MPI communicator 4156 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4157 . n - This value should be the same as the local size used in creating the 4158 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4159 calculated if `N` is given) For square matrices n is almost always `m`. 4160 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4161 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4162 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4163 . j - global column indices 4164 - a - optional matrix values 4165 4166 Output Parameter: 4167 . mat - the matrix 4168 4169 Level: intermediate 4170 4171 Notes: 4172 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4173 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4174 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4175 4176 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4177 4178 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4179 4180 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4181 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4182 4183 The format which is used for the sparse matrix input, is equivalent to a 4184 row-major ordering, i.e., for the following matrix, the input data expected is 4185 as shown 4186 .vb 4187 1 0 0 4188 2 0 3 P0 4189 ------- 4190 4 5 6 P1 4191 4192 Process0 [P0] rows_owned=[0,1] 4193 i = {0,1,3} [size = nrow+1 = 2+1] 4194 j = {0,0,2} [size = 3] 4195 v = {1,2,3} [size = 3] 4196 4197 Process1 [P1] rows_owned=[2] 4198 i = {0,3} [size = nrow+1 = 1+1] 4199 j = {0,1,2} [size = 3] 4200 v = {4,5,6} [size = 3] 4201 .ve 4202 4203 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4204 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4205 @*/ 4206 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4207 { 4208 PetscFunctionBegin; 4209 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4210 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4211 PetscCall(MatCreate(comm, mat)); 4212 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4213 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4214 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4215 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4216 PetscFunctionReturn(PETSC_SUCCESS); 4217 } 4218 4219 /*@ 4220 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4221 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4222 from `MatCreateMPIAIJWithArrays()` 4223 4224 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4225 4226 Collective 4227 4228 Input Parameters: 4229 + mat - the matrix 4230 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4231 . n - This value should be the same as the local size used in creating the 4232 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4233 calculated if N is given) For square matrices n is almost always m. 4234 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4235 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4236 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4237 . J - column indices 4238 - v - matrix values 4239 4240 Level: deprecated 4241 4242 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4243 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4244 @*/ 4245 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4246 { 4247 PetscInt nnz, i; 4248 PetscBool nooffprocentries; 4249 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4250 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4251 PetscScalar *ad, *ao; 4252 PetscInt ldi, Iii, md; 4253 const PetscInt *Adi = Ad->i; 4254 PetscInt *ld = Aij->ld; 4255 4256 PetscFunctionBegin; 4257 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4258 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4259 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4260 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4261 4262 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4263 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4264 4265 for (i = 0; i < m; i++) { 4266 if (PetscDefined(USE_DEBUG)) { 4267 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4268 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4269 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4270 } 4271 } 4272 nnz = Ii[i + 1] - Ii[i]; 4273 Iii = Ii[i]; 4274 ldi = ld[i]; 4275 md = Adi[i + 1] - Adi[i]; 4276 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4277 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4278 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4279 ad += md; 4280 ao += nnz - md; 4281 } 4282 nooffprocentries = mat->nooffprocentries; 4283 mat->nooffprocentries = PETSC_TRUE; 4284 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4285 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4289 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4290 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4291 mat->nooffprocentries = nooffprocentries; 4292 PetscFunctionReturn(PETSC_SUCCESS); 4293 } 4294 4295 /*@ 4296 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4297 4298 Collective 4299 4300 Input Parameters: 4301 + mat - the matrix 4302 - v - matrix values, stored by row 4303 4304 Level: intermediate 4305 4306 Notes: 4307 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4308 4309 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4310 4311 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4312 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4313 @*/ 4314 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4315 { 4316 PetscInt nnz, i, m; 4317 PetscBool nooffprocentries; 4318 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4319 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4320 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4321 PetscScalar *ad, *ao; 4322 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4323 PetscInt ldi, Iii, md; 4324 PetscInt *ld = Aij->ld; 4325 4326 PetscFunctionBegin; 4327 m = mat->rmap->n; 4328 4329 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4330 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4331 Iii = 0; 4332 for (i = 0; i < m; i++) { 4333 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4334 ldi = ld[i]; 4335 md = Adi[i + 1] - Adi[i]; 4336 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4337 ad += md; 4338 if (ao) { 4339 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4340 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4341 ao += nnz - md; 4342 } 4343 Iii += nnz; 4344 } 4345 nooffprocentries = mat->nooffprocentries; 4346 mat->nooffprocentries = PETSC_TRUE; 4347 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4348 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4352 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4353 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4354 mat->nooffprocentries = nooffprocentries; 4355 PetscFunctionReturn(PETSC_SUCCESS); 4356 } 4357 4358 /*@C 4359 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4360 (the default parallel PETSc format). For good matrix assembly performance 4361 the user should preallocate the matrix storage by setting the parameters 4362 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4363 4364 Collective 4365 4366 Input Parameters: 4367 + comm - MPI communicator 4368 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4369 This value should be the same as the local size used in creating the 4370 y vector for the matrix-vector product y = Ax. 4371 . n - This value should be the same as the local size used in creating the 4372 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4373 calculated if N is given) For square matrices n is almost always m. 4374 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4375 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4376 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4377 (same value is used for all local rows) 4378 . d_nnz - array containing the number of nonzeros in the various rows of the 4379 DIAGONAL portion of the local submatrix (possibly different for each row) 4380 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4381 The size of this array is equal to the number of local rows, i.e 'm'. 4382 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4383 submatrix (same value is used for all local rows). 4384 - o_nnz - array containing the number of nonzeros in the various rows of the 4385 OFF-DIAGONAL portion of the local submatrix (possibly different for 4386 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4387 structure. The size of this array is equal to the number 4388 of local rows, i.e 'm'. 4389 4390 Output Parameter: 4391 . A - the matrix 4392 4393 Options Database Keys: 4394 + -mat_no_inode - Do not use inodes 4395 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4396 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4397 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4398 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4399 4400 Level: intermediate 4401 4402 Notes: 4403 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4404 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4405 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4406 4407 If the *_nnz parameter is given then the *_nz parameter is ignored 4408 4409 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4410 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4411 storage requirements for this matrix. 4412 4413 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4414 processor than it must be used on all processors that share the object for 4415 that argument. 4416 4417 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4418 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4419 4420 The user MUST specify either the local or global matrix dimensions 4421 (possibly both). 4422 4423 The parallel matrix is partitioned across processors such that the 4424 first `m0` rows belong to process 0, the next `m1` rows belong to 4425 process 1, the next `m2` rows belong to process 2, etc., where 4426 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4427 values corresponding to [m x N] submatrix. 4428 4429 The columns are logically partitioned with the n0 columns belonging 4430 to 0th partition, the next n1 columns belonging to the next 4431 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4432 4433 The DIAGONAL portion of the local submatrix on any given processor 4434 is the submatrix corresponding to the rows and columns m,n 4435 corresponding to the given processor. i.e diagonal matrix on 4436 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4437 etc. The remaining portion of the local submatrix [m x (N-n)] 4438 constitute the OFF-DIAGONAL portion. The example below better 4439 illustrates this concept. 4440 4441 For a square global matrix we define each processor's diagonal portion 4442 to be its local rows and the corresponding columns (a square submatrix); 4443 each processor's off-diagonal portion encompasses the remainder of the 4444 local matrix (a rectangular submatrix). 4445 4446 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4447 4448 When calling this routine with a single process communicator, a matrix of 4449 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4450 type of communicator, use the construction mechanism 4451 .vb 4452 MatCreate(..., &A); 4453 MatSetType(A, MATMPIAIJ); 4454 MatSetSizes(A, m, n, M, N); 4455 MatMPIAIJSetPreallocation(A, ...); 4456 .ve 4457 4458 By default, this format uses inodes (identical nodes) when possible. 4459 We search for consecutive rows with the same nonzero structure, thereby 4460 reusing matrix information to achieve increased efficiency. 4461 4462 Example Usage: 4463 Consider the following 8x8 matrix with 34 non-zero values, that is 4464 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4465 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4466 as follows 4467 4468 .vb 4469 1 2 0 | 0 3 0 | 0 4 4470 Proc0 0 5 6 | 7 0 0 | 8 0 4471 9 0 10 | 11 0 0 | 12 0 4472 ------------------------------------- 4473 13 0 14 | 15 16 17 | 0 0 4474 Proc1 0 18 0 | 19 20 21 | 0 0 4475 0 0 0 | 22 23 0 | 24 0 4476 ------------------------------------- 4477 Proc2 25 26 27 | 0 0 28 | 29 0 4478 30 0 0 | 31 32 33 | 0 34 4479 .ve 4480 4481 This can be represented as a collection of submatrices as 4482 4483 .vb 4484 A B C 4485 D E F 4486 G H I 4487 .ve 4488 4489 Where the submatrices A,B,C are owned by proc0, D,E,F are 4490 owned by proc1, G,H,I are owned by proc2. 4491 4492 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4493 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'M','N' parameters are 8,8, and have the same values on all procs. 4495 4496 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4497 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4498 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4499 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4500 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4501 matrix, ans [DF] as another SeqAIJ matrix. 4502 4503 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4504 allocated for every row of the local diagonal submatrix, and `o_nz` 4505 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4506 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4507 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4508 In this case, the values of `d_nz`,`o_nz` are 4509 .vb 4510 proc0 dnz = 2, o_nz = 2 4511 proc1 dnz = 3, o_nz = 2 4512 proc2 dnz = 1, o_nz = 4 4513 .ve 4514 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4515 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4516 for proc3. i.e we are using 12+15+10=37 storage locations to store 4517 34 values. 4518 4519 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4520 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4521 In the above case the values for d_nnz,o_nnz are 4522 .vb 4523 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4524 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4525 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4526 .ve 4527 Here the space allocated is sum of all the above values i.e 34, and 4528 hence pre-allocation is perfect. 4529 4530 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4531 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4532 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4533 @*/ 4534 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4535 { 4536 PetscMPIInt size; 4537 4538 PetscFunctionBegin; 4539 PetscCall(MatCreate(comm, A)); 4540 PetscCall(MatSetSizes(*A, m, n, M, N)); 4541 PetscCallMPI(MPI_Comm_size(comm, &size)); 4542 if (size > 1) { 4543 PetscCall(MatSetType(*A, MATMPIAIJ)); 4544 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4545 } else { 4546 PetscCall(MatSetType(*A, MATSEQAIJ)); 4547 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4548 } 4549 PetscFunctionReturn(PETSC_SUCCESS); 4550 } 4551 4552 /*MC 4553 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4554 4555 Synopsis: 4556 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4557 4558 Not Collective 4559 4560 Input Parameter: 4561 . A - the `MATMPIAIJ` matrix 4562 4563 Output Parameters: 4564 + Ad - the diagonal portion of the matrix 4565 . Ao - the off-diagonal portion of the matrix 4566 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4567 - ierr - error code 4568 4569 Level: advanced 4570 4571 Note: 4572 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4573 4574 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4575 M*/ 4576 4577 /*MC 4578 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4579 4580 Synopsis: 4581 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4582 4583 Not Collective 4584 4585 Input Parameters: 4586 + A - the `MATMPIAIJ` matrix 4587 . Ad - the diagonal portion of the matrix 4588 . Ao - the off-diagonal portion of the matrix 4589 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4590 - ierr - error code 4591 4592 Level: advanced 4593 4594 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4595 M*/ 4596 4597 /*@C 4598 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4599 4600 Not Collective 4601 4602 Input Parameter: 4603 . A - The `MATMPIAIJ` matrix 4604 4605 Output Parameters: 4606 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4607 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4608 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4609 4610 Level: intermediate 4611 4612 Note: 4613 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4614 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4615 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4616 local column numbers to global column numbers in the original matrix. 4617 4618 Fortran Notes: 4619 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4620 4621 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4622 @*/ 4623 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4624 { 4625 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4626 PetscBool flg; 4627 4628 PetscFunctionBegin; 4629 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4630 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4631 if (Ad) *Ad = a->A; 4632 if (Ao) *Ao = a->B; 4633 if (colmap) *colmap = a->garray; 4634 PetscFunctionReturn(PETSC_SUCCESS); 4635 } 4636 4637 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4638 { 4639 PetscInt m, N, i, rstart, nnz, Ii; 4640 PetscInt *indx; 4641 PetscScalar *values; 4642 MatType rootType; 4643 4644 PetscFunctionBegin; 4645 PetscCall(MatGetSize(inmat, &m, &N)); 4646 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4647 PetscInt *dnz, *onz, sum, bs, cbs; 4648 4649 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4650 /* Check sum(n) = N */ 4651 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4652 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4653 4654 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4655 rstart -= m; 4656 4657 MatPreallocateBegin(comm, m, n, dnz, onz); 4658 for (i = 0; i < m; i++) { 4659 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4660 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4661 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4662 } 4663 4664 PetscCall(MatCreate(comm, outmat)); 4665 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4666 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4667 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4668 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4669 PetscCall(MatSetType(*outmat, rootType)); 4670 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4671 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4672 MatPreallocateEnd(dnz, onz); 4673 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4674 } 4675 4676 /* numeric phase */ 4677 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4678 for (i = 0; i < m; i++) { 4679 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4680 Ii = i + rstart; 4681 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4682 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4683 } 4684 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4685 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4686 PetscFunctionReturn(PETSC_SUCCESS); 4687 } 4688 4689 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4690 { 4691 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4692 4693 PetscFunctionBegin; 4694 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4695 PetscCall(PetscFree(merge->id_r)); 4696 PetscCall(PetscFree(merge->len_s)); 4697 PetscCall(PetscFree(merge->len_r)); 4698 PetscCall(PetscFree(merge->bi)); 4699 PetscCall(PetscFree(merge->bj)); 4700 PetscCall(PetscFree(merge->buf_ri[0])); 4701 PetscCall(PetscFree(merge->buf_ri)); 4702 PetscCall(PetscFree(merge->buf_rj[0])); 4703 PetscCall(PetscFree(merge->buf_rj)); 4704 PetscCall(PetscFree(merge->coi)); 4705 PetscCall(PetscFree(merge->coj)); 4706 PetscCall(PetscFree(merge->owners_co)); 4707 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4708 PetscCall(PetscFree(merge)); 4709 PetscFunctionReturn(PETSC_SUCCESS); 4710 } 4711 4712 #include <../src/mat/utils/freespace.h> 4713 #include <petscbt.h> 4714 4715 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4716 { 4717 MPI_Comm comm; 4718 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4719 PetscMPIInt size, rank, taga, *len_s; 4720 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4721 PetscInt proc, m; 4722 PetscInt **buf_ri, **buf_rj; 4723 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4724 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4725 MPI_Request *s_waits, *r_waits; 4726 MPI_Status *status; 4727 const MatScalar *aa, *a_a; 4728 MatScalar **abuf_r, *ba_i; 4729 Mat_Merge_SeqsToMPI *merge; 4730 PetscContainer container; 4731 4732 PetscFunctionBegin; 4733 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4734 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4735 4736 PetscCallMPI(MPI_Comm_size(comm, &size)); 4737 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4738 4739 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4740 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4741 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4742 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4743 aa = a_a; 4744 4745 bi = merge->bi; 4746 bj = merge->bj; 4747 buf_ri = merge->buf_ri; 4748 buf_rj = merge->buf_rj; 4749 4750 PetscCall(PetscMalloc1(size, &status)); 4751 owners = merge->rowmap->range; 4752 len_s = merge->len_s; 4753 4754 /* send and recv matrix values */ 4755 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4756 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4757 4758 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4759 for (proc = 0, k = 0; proc < size; proc++) { 4760 if (!len_s[proc]) continue; 4761 i = owners[proc]; 4762 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4763 k++; 4764 } 4765 4766 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4767 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4768 PetscCall(PetscFree(status)); 4769 4770 PetscCall(PetscFree(s_waits)); 4771 PetscCall(PetscFree(r_waits)); 4772 4773 /* insert mat values of mpimat */ 4774 PetscCall(PetscMalloc1(N, &ba_i)); 4775 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4776 4777 for (k = 0; k < merge->nrecv; k++) { 4778 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4779 nrows = *buf_ri_k[k]; 4780 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4781 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4782 } 4783 4784 /* set values of ba */ 4785 m = merge->rowmap->n; 4786 for (i = 0; i < m; i++) { 4787 arow = owners[rank] + i; 4788 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4789 bnzi = bi[i + 1] - bi[i]; 4790 PetscCall(PetscArrayzero(ba_i, bnzi)); 4791 4792 /* add local non-zero vals of this proc's seqmat into ba */ 4793 anzi = ai[arow + 1] - ai[arow]; 4794 aj = a->j + ai[arow]; 4795 aa = a_a + ai[arow]; 4796 nextaj = 0; 4797 for (j = 0; nextaj < anzi; j++) { 4798 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4799 ba_i[j] += aa[nextaj++]; 4800 } 4801 } 4802 4803 /* add received vals into ba */ 4804 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4805 /* i-th row */ 4806 if (i == *nextrow[k]) { 4807 anzi = *(nextai[k] + 1) - *nextai[k]; 4808 aj = buf_rj[k] + *nextai[k]; 4809 aa = abuf_r[k] + *nextai[k]; 4810 nextaj = 0; 4811 for (j = 0; nextaj < anzi; j++) { 4812 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4813 ba_i[j] += aa[nextaj++]; 4814 } 4815 } 4816 nextrow[k]++; 4817 nextai[k]++; 4818 } 4819 } 4820 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4821 } 4822 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4823 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4824 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4825 4826 PetscCall(PetscFree(abuf_r[0])); 4827 PetscCall(PetscFree(abuf_r)); 4828 PetscCall(PetscFree(ba_i)); 4829 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4830 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4831 PetscFunctionReturn(PETSC_SUCCESS); 4832 } 4833 4834 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4835 { 4836 Mat B_mpi; 4837 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4838 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4839 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4840 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4841 PetscInt len, proc, *dnz, *onz, bs, cbs; 4842 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4843 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4844 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4845 MPI_Status *status; 4846 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4847 PetscBT lnkbt; 4848 Mat_Merge_SeqsToMPI *merge; 4849 PetscContainer container; 4850 4851 PetscFunctionBegin; 4852 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4853 4854 /* make sure it is a PETSc comm */ 4855 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4856 PetscCallMPI(MPI_Comm_size(comm, &size)); 4857 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4858 4859 PetscCall(PetscNew(&merge)); 4860 PetscCall(PetscMalloc1(size, &status)); 4861 4862 /* determine row ownership */ 4863 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4864 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4865 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4866 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4867 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4868 PetscCall(PetscMalloc1(size, &len_si)); 4869 PetscCall(PetscMalloc1(size, &merge->len_s)); 4870 4871 m = merge->rowmap->n; 4872 owners = merge->rowmap->range; 4873 4874 /* determine the number of messages to send, their lengths */ 4875 len_s = merge->len_s; 4876 4877 len = 0; /* length of buf_si[] */ 4878 merge->nsend = 0; 4879 for (proc = 0; proc < size; proc++) { 4880 len_si[proc] = 0; 4881 if (proc == rank) { 4882 len_s[proc] = 0; 4883 } else { 4884 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4885 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4886 } 4887 if (len_s[proc]) { 4888 merge->nsend++; 4889 nrows = 0; 4890 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4891 if (ai[i + 1] > ai[i]) nrows++; 4892 } 4893 len_si[proc] = 2 * (nrows + 1); 4894 len += len_si[proc]; 4895 } 4896 } 4897 4898 /* determine the number and length of messages to receive for ij-structure */ 4899 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4900 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4901 4902 /* post the Irecv of j-structure */ 4903 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4904 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4905 4906 /* post the Isend of j-structure */ 4907 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4908 4909 for (proc = 0, k = 0; proc < size; proc++) { 4910 if (!len_s[proc]) continue; 4911 i = owners[proc]; 4912 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4913 k++; 4914 } 4915 4916 /* receives and sends of j-structure are complete */ 4917 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4918 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4919 4920 /* send and recv i-structure */ 4921 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4922 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4923 4924 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4925 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4926 for (proc = 0, k = 0; proc < size; proc++) { 4927 if (!len_s[proc]) continue; 4928 /* form outgoing message for i-structure: 4929 buf_si[0]: nrows to be sent 4930 [1:nrows]: row index (global) 4931 [nrows+1:2*nrows+1]: i-structure index 4932 */ 4933 nrows = len_si[proc] / 2 - 1; 4934 buf_si_i = buf_si + nrows + 1; 4935 buf_si[0] = nrows; 4936 buf_si_i[0] = 0; 4937 nrows = 0; 4938 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4939 anzi = ai[i + 1] - ai[i]; 4940 if (anzi) { 4941 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4942 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4943 nrows++; 4944 } 4945 } 4946 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4947 k++; 4948 buf_si += len_si[proc]; 4949 } 4950 4951 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4952 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4953 4954 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4955 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4956 4957 PetscCall(PetscFree(len_si)); 4958 PetscCall(PetscFree(len_ri)); 4959 PetscCall(PetscFree(rj_waits)); 4960 PetscCall(PetscFree2(si_waits, sj_waits)); 4961 PetscCall(PetscFree(ri_waits)); 4962 PetscCall(PetscFree(buf_s)); 4963 PetscCall(PetscFree(status)); 4964 4965 /* compute a local seq matrix in each processor */ 4966 /* allocate bi array and free space for accumulating nonzero column info */ 4967 PetscCall(PetscMalloc1(m + 1, &bi)); 4968 bi[0] = 0; 4969 4970 /* create and initialize a linked list */ 4971 nlnk = N + 1; 4972 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4973 4974 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4975 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4976 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4977 4978 current_space = free_space; 4979 4980 /* determine symbolic info for each local row */ 4981 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4982 4983 for (k = 0; k < merge->nrecv; k++) { 4984 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4985 nrows = *buf_ri_k[k]; 4986 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4987 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4988 } 4989 4990 MatPreallocateBegin(comm, m, n, dnz, onz); 4991 len = 0; 4992 for (i = 0; i < m; i++) { 4993 bnzi = 0; 4994 /* add local non-zero cols of this proc's seqmat into lnk */ 4995 arow = owners[rank] + i; 4996 anzi = ai[arow + 1] - ai[arow]; 4997 aj = a->j + ai[arow]; 4998 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4999 bnzi += nlnk; 5000 /* add received col data into lnk */ 5001 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5002 if (i == *nextrow[k]) { /* i-th row */ 5003 anzi = *(nextai[k] + 1) - *nextai[k]; 5004 aj = buf_rj[k] + *nextai[k]; 5005 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5006 bnzi += nlnk; 5007 nextrow[k]++; 5008 nextai[k]++; 5009 } 5010 } 5011 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5012 5013 /* if free space is not available, make more free space */ 5014 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5015 /* copy data into free space, then initialize lnk */ 5016 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5017 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5018 5019 current_space->array += bnzi; 5020 current_space->local_used += bnzi; 5021 current_space->local_remaining -= bnzi; 5022 5023 bi[i + 1] = bi[i] + bnzi; 5024 } 5025 5026 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5027 5028 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5029 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5030 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5031 5032 /* create symbolic parallel matrix B_mpi */ 5033 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5034 PetscCall(MatCreate(comm, &B_mpi)); 5035 if (n == PETSC_DECIDE) { 5036 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5037 } else { 5038 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5039 } 5040 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5041 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5042 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5043 MatPreallocateEnd(dnz, onz); 5044 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5045 5046 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5047 B_mpi->assembled = PETSC_FALSE; 5048 merge->bi = bi; 5049 merge->bj = bj; 5050 merge->buf_ri = buf_ri; 5051 merge->buf_rj = buf_rj; 5052 merge->coi = NULL; 5053 merge->coj = NULL; 5054 merge->owners_co = NULL; 5055 5056 PetscCall(PetscCommDestroy(&comm)); 5057 5058 /* attach the supporting struct to B_mpi for reuse */ 5059 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5060 PetscCall(PetscContainerSetPointer(container, merge)); 5061 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5062 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5063 PetscCall(PetscContainerDestroy(&container)); 5064 *mpimat = B_mpi; 5065 5066 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5067 PetscFunctionReturn(PETSC_SUCCESS); 5068 } 5069 5070 /*@ 5071 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5072 matrices from each processor 5073 5074 Collective 5075 5076 Input Parameters: 5077 + comm - the communicators the parallel matrix will live on 5078 . seqmat - the input sequential matrices 5079 . m - number of local rows (or `PETSC_DECIDE`) 5080 . n - number of local columns (or `PETSC_DECIDE`) 5081 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5082 5083 Output Parameter: 5084 . mpimat - the parallel matrix generated 5085 5086 Level: advanced 5087 5088 Note: 5089 The dimensions of the sequential matrix in each processor MUST be the same. 5090 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5091 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5092 5093 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5094 @*/ 5095 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5096 { 5097 PetscMPIInt size; 5098 5099 PetscFunctionBegin; 5100 PetscCallMPI(MPI_Comm_size(comm, &size)); 5101 if (size == 1) { 5102 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5103 if (scall == MAT_INITIAL_MATRIX) { 5104 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5105 } else { 5106 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5107 } 5108 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5109 PetscFunctionReturn(PETSC_SUCCESS); 5110 } 5111 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5112 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5113 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5114 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5115 PetscFunctionReturn(PETSC_SUCCESS); 5116 } 5117 5118 /*@ 5119 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5120 5121 Not Collective 5122 5123 Input Parameter: 5124 . A - the matrix 5125 5126 Output Parameter: 5127 . A_loc - the local sequential matrix generated 5128 5129 Level: developer 5130 5131 Notes: 5132 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5133 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5134 `n` is the global column count obtained with `MatGetSize()` 5135 5136 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5137 5138 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5139 5140 Destroy the matrix with `MatDestroy()` 5141 5142 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5143 @*/ 5144 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5145 { 5146 PetscBool mpi; 5147 5148 PetscFunctionBegin; 5149 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5150 if (mpi) { 5151 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5152 } else { 5153 *A_loc = A; 5154 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5155 } 5156 PetscFunctionReturn(PETSC_SUCCESS); 5157 } 5158 5159 /*@ 5160 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5161 5162 Not Collective 5163 5164 Input Parameters: 5165 + A - the matrix 5166 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5167 5168 Output Parameter: 5169 . A_loc - the local sequential matrix generated 5170 5171 Level: developer 5172 5173 Notes: 5174 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5175 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5176 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5177 5178 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5179 5180 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5181 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5182 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5183 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5184 5185 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5186 @*/ 5187 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5188 { 5189 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5190 Mat_SeqAIJ *mat, *a, *b; 5191 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5192 const PetscScalar *aa, *ba, *aav, *bav; 5193 PetscScalar *ca, *cam; 5194 PetscMPIInt size; 5195 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5196 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5197 PetscBool match; 5198 5199 PetscFunctionBegin; 5200 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5201 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5202 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5203 if (size == 1) { 5204 if (scall == MAT_INITIAL_MATRIX) { 5205 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5206 *A_loc = mpimat->A; 5207 } else if (scall == MAT_REUSE_MATRIX) { 5208 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5209 } 5210 PetscFunctionReturn(PETSC_SUCCESS); 5211 } 5212 5213 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5214 a = (Mat_SeqAIJ *)mpimat->A->data; 5215 b = (Mat_SeqAIJ *)mpimat->B->data; 5216 ai = a->i; 5217 aj = a->j; 5218 bi = b->i; 5219 bj = b->j; 5220 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5221 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5222 aa = aav; 5223 ba = bav; 5224 if (scall == MAT_INITIAL_MATRIX) { 5225 PetscCall(PetscMalloc1(1 + am, &ci)); 5226 ci[0] = 0; 5227 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5228 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5229 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5230 k = 0; 5231 for (i = 0; i < am; i++) { 5232 ncols_o = bi[i + 1] - bi[i]; 5233 ncols_d = ai[i + 1] - ai[i]; 5234 /* off-diagonal portion of A */ 5235 for (jo = 0; jo < ncols_o; jo++) { 5236 col = cmap[*bj]; 5237 if (col >= cstart) break; 5238 cj[k] = col; 5239 bj++; 5240 ca[k++] = *ba++; 5241 } 5242 /* diagonal portion of A */ 5243 for (j = 0; j < ncols_d; j++) { 5244 cj[k] = cstart + *aj++; 5245 ca[k++] = *aa++; 5246 } 5247 /* off-diagonal portion of A */ 5248 for (j = jo; j < ncols_o; j++) { 5249 cj[k] = cmap[*bj++]; 5250 ca[k++] = *ba++; 5251 } 5252 } 5253 /* put together the new matrix */ 5254 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5255 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5256 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5257 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5258 mat->free_a = PETSC_TRUE; 5259 mat->free_ij = PETSC_TRUE; 5260 mat->nonew = 0; 5261 } else if (scall == MAT_REUSE_MATRIX) { 5262 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5263 ci = mat->i; 5264 cj = mat->j; 5265 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5266 for (i = 0; i < am; i++) { 5267 /* off-diagonal portion of A */ 5268 ncols_o = bi[i + 1] - bi[i]; 5269 for (jo = 0; jo < ncols_o; jo++) { 5270 col = cmap[*bj]; 5271 if (col >= cstart) break; 5272 *cam++ = *ba++; 5273 bj++; 5274 } 5275 /* diagonal portion of A */ 5276 ncols_d = ai[i + 1] - ai[i]; 5277 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5278 /* off-diagonal portion of A */ 5279 for (j = jo; j < ncols_o; j++) { 5280 *cam++ = *ba++; 5281 bj++; 5282 } 5283 } 5284 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5285 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5286 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5287 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5288 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5289 PetscFunctionReturn(PETSC_SUCCESS); 5290 } 5291 5292 /*@ 5293 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5294 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5295 5296 Not Collective 5297 5298 Input Parameters: 5299 + A - the matrix 5300 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5301 5302 Output Parameters: 5303 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5304 - A_loc - the local sequential matrix generated 5305 5306 Level: developer 5307 5308 Note: 5309 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5310 part, then those associated with the off-diagonal part (in its local ordering) 5311 5312 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5313 @*/ 5314 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5315 { 5316 Mat Ao, Ad; 5317 const PetscInt *cmap; 5318 PetscMPIInt size; 5319 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5320 5321 PetscFunctionBegin; 5322 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5323 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5324 if (size == 1) { 5325 if (scall == MAT_INITIAL_MATRIX) { 5326 PetscCall(PetscObjectReference((PetscObject)Ad)); 5327 *A_loc = Ad; 5328 } else if (scall == MAT_REUSE_MATRIX) { 5329 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5330 } 5331 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5332 PetscFunctionReturn(PETSC_SUCCESS); 5333 } 5334 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5335 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5336 if (f) { 5337 PetscCall((*f)(A, scall, glob, A_loc)); 5338 } else { 5339 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5340 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5341 Mat_SeqAIJ *c; 5342 PetscInt *ai = a->i, *aj = a->j; 5343 PetscInt *bi = b->i, *bj = b->j; 5344 PetscInt *ci, *cj; 5345 const PetscScalar *aa, *ba; 5346 PetscScalar *ca; 5347 PetscInt i, j, am, dn, on; 5348 5349 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5350 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5351 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5352 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5353 if (scall == MAT_INITIAL_MATRIX) { 5354 PetscInt k; 5355 PetscCall(PetscMalloc1(1 + am, &ci)); 5356 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5357 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5358 ci[0] = 0; 5359 for (i = 0, k = 0; i < am; i++) { 5360 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5361 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5362 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5363 /* diagonal portion of A */ 5364 for (j = 0; j < ncols_d; j++, k++) { 5365 cj[k] = *aj++; 5366 ca[k] = *aa++; 5367 } 5368 /* off-diagonal portion of A */ 5369 for (j = 0; j < ncols_o; j++, k++) { 5370 cj[k] = dn + *bj++; 5371 ca[k] = *ba++; 5372 } 5373 } 5374 /* put together the new matrix */ 5375 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5376 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5377 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5378 c = (Mat_SeqAIJ *)(*A_loc)->data; 5379 c->free_a = PETSC_TRUE; 5380 c->free_ij = PETSC_TRUE; 5381 c->nonew = 0; 5382 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5383 } else if (scall == MAT_REUSE_MATRIX) { 5384 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5385 for (i = 0; i < am; i++) { 5386 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5387 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5388 /* diagonal portion of A */ 5389 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5390 /* off-diagonal portion of A */ 5391 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5392 } 5393 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5394 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5395 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5396 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5397 if (glob) { 5398 PetscInt cst, *gidx; 5399 5400 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5401 PetscCall(PetscMalloc1(dn + on, &gidx)); 5402 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5403 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5404 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5405 } 5406 } 5407 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5408 PetscFunctionReturn(PETSC_SUCCESS); 5409 } 5410 5411 /*@C 5412 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5413 5414 Not Collective 5415 5416 Input Parameters: 5417 + A - the matrix 5418 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5419 . row - index set of rows to extract (or `NULL`) 5420 - col - index set of columns to extract (or `NULL`) 5421 5422 Output Parameter: 5423 . A_loc - the local sequential matrix generated 5424 5425 Level: developer 5426 5427 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5428 @*/ 5429 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5430 { 5431 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5432 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5433 IS isrowa, iscola; 5434 Mat *aloc; 5435 PetscBool match; 5436 5437 PetscFunctionBegin; 5438 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5439 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5440 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5441 if (!row) { 5442 start = A->rmap->rstart; 5443 end = A->rmap->rend; 5444 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5445 } else { 5446 isrowa = *row; 5447 } 5448 if (!col) { 5449 start = A->cmap->rstart; 5450 cmap = a->garray; 5451 nzA = a->A->cmap->n; 5452 nzB = a->B->cmap->n; 5453 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5454 ncols = 0; 5455 for (i = 0; i < nzB; i++) { 5456 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5457 else break; 5458 } 5459 imark = i; 5460 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5461 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5462 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5463 } else { 5464 iscola = *col; 5465 } 5466 if (scall != MAT_INITIAL_MATRIX) { 5467 PetscCall(PetscMalloc1(1, &aloc)); 5468 aloc[0] = *A_loc; 5469 } 5470 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5471 if (!col) { /* attach global id of condensed columns */ 5472 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5473 } 5474 *A_loc = aloc[0]; 5475 PetscCall(PetscFree(aloc)); 5476 if (!row) PetscCall(ISDestroy(&isrowa)); 5477 if (!col) PetscCall(ISDestroy(&iscola)); 5478 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5479 PetscFunctionReturn(PETSC_SUCCESS); 5480 } 5481 5482 /* 5483 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5484 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5485 * on a global size. 5486 * */ 5487 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5488 { 5489 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5490 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5491 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5492 PetscMPIInt owner; 5493 PetscSFNode *iremote, *oiremote; 5494 const PetscInt *lrowindices; 5495 PetscSF sf, osf; 5496 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5497 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5498 MPI_Comm comm; 5499 ISLocalToGlobalMapping mapping; 5500 const PetscScalar *pd_a, *po_a; 5501 5502 PetscFunctionBegin; 5503 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5504 /* plocalsize is the number of roots 5505 * nrows is the number of leaves 5506 * */ 5507 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5508 PetscCall(ISGetLocalSize(rows, &nrows)); 5509 PetscCall(PetscCalloc1(nrows, &iremote)); 5510 PetscCall(ISGetIndices(rows, &lrowindices)); 5511 for (i = 0; i < nrows; i++) { 5512 /* Find a remote index and an owner for a row 5513 * The row could be local or remote 5514 * */ 5515 owner = 0; 5516 lidx = 0; 5517 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5518 iremote[i].index = lidx; 5519 iremote[i].rank = owner; 5520 } 5521 /* Create SF to communicate how many nonzero columns for each row */ 5522 PetscCall(PetscSFCreate(comm, &sf)); 5523 /* SF will figure out the number of nonzero columns for each row, and their 5524 * offsets 5525 * */ 5526 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5527 PetscCall(PetscSFSetFromOptions(sf)); 5528 PetscCall(PetscSFSetUp(sf)); 5529 5530 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5531 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5532 PetscCall(PetscCalloc1(nrows, &pnnz)); 5533 roffsets[0] = 0; 5534 roffsets[1] = 0; 5535 for (i = 0; i < plocalsize; i++) { 5536 /* diagonal */ 5537 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5538 /* off-diagonal */ 5539 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5540 /* compute offsets so that we relative location for each row */ 5541 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5542 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5543 } 5544 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5545 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5546 /* 'r' means root, and 'l' means leaf */ 5547 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5548 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5551 PetscCall(PetscSFDestroy(&sf)); 5552 PetscCall(PetscFree(roffsets)); 5553 PetscCall(PetscFree(nrcols)); 5554 dntotalcols = 0; 5555 ontotalcols = 0; 5556 ncol = 0; 5557 for (i = 0; i < nrows; i++) { 5558 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5559 ncol = PetscMax(pnnz[i], ncol); 5560 /* diagonal */ 5561 dntotalcols += nlcols[i * 2 + 0]; 5562 /* off-diagonal */ 5563 ontotalcols += nlcols[i * 2 + 1]; 5564 } 5565 /* We do not need to figure the right number of columns 5566 * since all the calculations will be done by going through the raw data 5567 * */ 5568 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5569 PetscCall(MatSetUp(*P_oth)); 5570 PetscCall(PetscFree(pnnz)); 5571 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5572 /* diagonal */ 5573 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5574 /* off-diagonal */ 5575 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5576 /* diagonal */ 5577 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5578 /* off-diagonal */ 5579 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5580 dntotalcols = 0; 5581 ontotalcols = 0; 5582 ntotalcols = 0; 5583 for (i = 0; i < nrows; i++) { 5584 owner = 0; 5585 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5586 /* Set iremote for diag matrix */ 5587 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5588 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5589 iremote[dntotalcols].rank = owner; 5590 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5591 ilocal[dntotalcols++] = ntotalcols++; 5592 } 5593 /* off-diagonal */ 5594 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5595 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5596 oiremote[ontotalcols].rank = owner; 5597 oilocal[ontotalcols++] = ntotalcols++; 5598 } 5599 } 5600 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5601 PetscCall(PetscFree(loffsets)); 5602 PetscCall(PetscFree(nlcols)); 5603 PetscCall(PetscSFCreate(comm, &sf)); 5604 /* P serves as roots and P_oth is leaves 5605 * Diag matrix 5606 * */ 5607 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5608 PetscCall(PetscSFSetFromOptions(sf)); 5609 PetscCall(PetscSFSetUp(sf)); 5610 5611 PetscCall(PetscSFCreate(comm, &osf)); 5612 /* off-diagonal */ 5613 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5614 PetscCall(PetscSFSetFromOptions(osf)); 5615 PetscCall(PetscSFSetUp(osf)); 5616 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5617 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5618 /* operate on the matrix internal data to save memory */ 5619 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5620 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5621 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5622 /* Convert to global indices for diag matrix */ 5623 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5624 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5625 /* We want P_oth store global indices */ 5626 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5627 /* Use memory scalable approach */ 5628 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5629 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5630 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5631 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5632 /* Convert back to local indices */ 5633 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5634 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5635 nout = 0; 5636 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5637 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5638 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5639 /* Exchange values */ 5640 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5641 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5643 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5644 /* Stop PETSc from shrinking memory */ 5645 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5646 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5647 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5648 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5649 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5650 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5651 PetscCall(PetscSFDestroy(&sf)); 5652 PetscCall(PetscSFDestroy(&osf)); 5653 PetscFunctionReturn(PETSC_SUCCESS); 5654 } 5655 5656 /* 5657 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5658 * This supports MPIAIJ and MAIJ 5659 * */ 5660 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5661 { 5662 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5663 Mat_SeqAIJ *p_oth; 5664 IS rows, map; 5665 PetscHMapI hamp; 5666 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5667 MPI_Comm comm; 5668 PetscSF sf, osf; 5669 PetscBool has; 5670 5671 PetscFunctionBegin; 5672 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5673 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5674 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5675 * and then create a submatrix (that often is an overlapping matrix) 5676 * */ 5677 if (reuse == MAT_INITIAL_MATRIX) { 5678 /* Use a hash table to figure out unique keys */ 5679 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5680 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5681 count = 0; 5682 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5683 for (i = 0; i < a->B->cmap->n; i++) { 5684 key = a->garray[i] / dof; 5685 PetscCall(PetscHMapIHas(hamp, key, &has)); 5686 if (!has) { 5687 mapping[i] = count; 5688 PetscCall(PetscHMapISet(hamp, key, count++)); 5689 } else { 5690 /* Current 'i' has the same value the previous step */ 5691 mapping[i] = count - 1; 5692 } 5693 } 5694 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5695 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5696 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5697 PetscCall(PetscCalloc1(htsize, &rowindices)); 5698 off = 0; 5699 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5700 PetscCall(PetscHMapIDestroy(&hamp)); 5701 PetscCall(PetscSortInt(htsize, rowindices)); 5702 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5703 /* In case, the matrix was already created but users want to recreate the matrix */ 5704 PetscCall(MatDestroy(P_oth)); 5705 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5706 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5707 PetscCall(ISDestroy(&map)); 5708 PetscCall(ISDestroy(&rows)); 5709 } else if (reuse == MAT_REUSE_MATRIX) { 5710 /* If matrix was already created, we simply update values using SF objects 5711 * that as attached to the matrix earlier. 5712 */ 5713 const PetscScalar *pd_a, *po_a; 5714 5715 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5716 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5717 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5718 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5719 /* Update values in place */ 5720 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5721 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5722 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5723 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5727 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5728 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5729 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5730 PetscFunctionReturn(PETSC_SUCCESS); 5731 } 5732 5733 /*@C 5734 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5735 5736 Collective 5737 5738 Input Parameters: 5739 + A - the first matrix in `MATMPIAIJ` format 5740 . B - the second matrix in `MATMPIAIJ` format 5741 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5742 5743 Output Parameters: 5744 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5745 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5746 - B_seq - the sequential matrix generated 5747 5748 Level: developer 5749 5750 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5751 @*/ 5752 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5753 { 5754 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5755 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5756 IS isrowb, iscolb; 5757 Mat *bseq = NULL; 5758 5759 PetscFunctionBegin; 5760 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5761 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5762 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5763 5764 if (scall == MAT_INITIAL_MATRIX) { 5765 start = A->cmap->rstart; 5766 cmap = a->garray; 5767 nzA = a->A->cmap->n; 5768 nzB = a->B->cmap->n; 5769 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5770 ncols = 0; 5771 for (i = 0; i < nzB; i++) { /* row < local row index */ 5772 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5773 else break; 5774 } 5775 imark = i; 5776 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5777 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5778 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5779 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5780 } else { 5781 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5782 isrowb = *rowb; 5783 iscolb = *colb; 5784 PetscCall(PetscMalloc1(1, &bseq)); 5785 bseq[0] = *B_seq; 5786 } 5787 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5788 *B_seq = bseq[0]; 5789 PetscCall(PetscFree(bseq)); 5790 if (!rowb) { 5791 PetscCall(ISDestroy(&isrowb)); 5792 } else { 5793 *rowb = isrowb; 5794 } 5795 if (!colb) { 5796 PetscCall(ISDestroy(&iscolb)); 5797 } else { 5798 *colb = iscolb; 5799 } 5800 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5801 PetscFunctionReturn(PETSC_SUCCESS); 5802 } 5803 5804 /* 5805 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5806 of the OFF-DIAGONAL portion of local A 5807 5808 Collective 5809 5810 Input Parameters: 5811 + A,B - the matrices in `MATMPIAIJ` format 5812 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5813 5814 Output Parameter: 5815 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5816 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5817 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5818 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5819 5820 Developer Note: 5821 This directly accesses information inside the VecScatter associated with the matrix-vector product 5822 for this matrix. This is not desirable.. 5823 5824 Level: developer 5825 5826 */ 5827 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5828 { 5829 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5830 Mat_SeqAIJ *b_oth; 5831 VecScatter ctx; 5832 MPI_Comm comm; 5833 const PetscMPIInt *rprocs, *sprocs; 5834 const PetscInt *srow, *rstarts, *sstarts; 5835 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5836 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5837 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5838 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5839 PetscMPIInt size, tag, rank, nreqs; 5840 5841 PetscFunctionBegin; 5842 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5843 PetscCallMPI(MPI_Comm_size(comm, &size)); 5844 5845 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5846 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5847 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5848 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5849 5850 if (size == 1) { 5851 startsj_s = NULL; 5852 bufa_ptr = NULL; 5853 *B_oth = NULL; 5854 PetscFunctionReturn(PETSC_SUCCESS); 5855 } 5856 5857 ctx = a->Mvctx; 5858 tag = ((PetscObject)ctx)->tag; 5859 5860 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5861 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5862 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5863 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5864 PetscCall(PetscMalloc1(nreqs, &reqs)); 5865 rwaits = reqs; 5866 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5867 5868 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5869 if (scall == MAT_INITIAL_MATRIX) { 5870 /* i-array */ 5871 /* post receives */ 5872 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5873 for (i = 0; i < nrecvs; i++) { 5874 rowlen = rvalues + rstarts[i] * rbs; 5875 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5876 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5877 } 5878 5879 /* pack the outgoing message */ 5880 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5881 5882 sstartsj[0] = 0; 5883 rstartsj[0] = 0; 5884 len = 0; /* total length of j or a array to be sent */ 5885 if (nsends) { 5886 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5887 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5888 } 5889 for (i = 0; i < nsends; i++) { 5890 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5891 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5892 for (j = 0; j < nrows; j++) { 5893 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5894 for (l = 0; l < sbs; l++) { 5895 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5896 5897 rowlen[j * sbs + l] = ncols; 5898 5899 len += ncols; 5900 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5901 } 5902 k++; 5903 } 5904 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5905 5906 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5907 } 5908 /* recvs and sends of i-array are completed */ 5909 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5910 PetscCall(PetscFree(svalues)); 5911 5912 /* allocate buffers for sending j and a arrays */ 5913 PetscCall(PetscMalloc1(len + 1, &bufj)); 5914 PetscCall(PetscMalloc1(len + 1, &bufa)); 5915 5916 /* create i-array of B_oth */ 5917 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5918 5919 b_othi[0] = 0; 5920 len = 0; /* total length of j or a array to be received */ 5921 k = 0; 5922 for (i = 0; i < nrecvs; i++) { 5923 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5924 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5925 for (j = 0; j < nrows; j++) { 5926 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5927 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5928 k++; 5929 } 5930 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5931 } 5932 PetscCall(PetscFree(rvalues)); 5933 5934 /* allocate space for j and a arrays of B_oth */ 5935 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5936 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5937 5938 /* j-array */ 5939 /* post receives of j-array */ 5940 for (i = 0; i < nrecvs; i++) { 5941 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5942 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5943 } 5944 5945 /* pack the outgoing message j-array */ 5946 if (nsends) k = sstarts[0]; 5947 for (i = 0; i < nsends; i++) { 5948 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5949 bufJ = bufj + sstartsj[i]; 5950 for (j = 0; j < nrows; j++) { 5951 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5952 for (ll = 0; ll < sbs; ll++) { 5953 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5954 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5955 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5956 } 5957 } 5958 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5959 } 5960 5961 /* recvs and sends of j-array are completed */ 5962 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5963 } else if (scall == MAT_REUSE_MATRIX) { 5964 sstartsj = *startsj_s; 5965 rstartsj = *startsj_r; 5966 bufa = *bufa_ptr; 5967 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5968 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5969 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5970 5971 /* a-array */ 5972 /* post receives of a-array */ 5973 for (i = 0; i < nrecvs; i++) { 5974 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5975 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5976 } 5977 5978 /* pack the outgoing message a-array */ 5979 if (nsends) k = sstarts[0]; 5980 for (i = 0; i < nsends; i++) { 5981 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5982 bufA = bufa + sstartsj[i]; 5983 for (j = 0; j < nrows; j++) { 5984 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5985 for (ll = 0; ll < sbs; ll++) { 5986 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5987 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5988 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5989 } 5990 } 5991 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5992 } 5993 /* recvs and sends of a-array are completed */ 5994 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5995 PetscCall(PetscFree(reqs)); 5996 5997 if (scall == MAT_INITIAL_MATRIX) { 5998 /* put together the new matrix */ 5999 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6000 6001 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6002 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6003 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6004 b_oth->free_a = PETSC_TRUE; 6005 b_oth->free_ij = PETSC_TRUE; 6006 b_oth->nonew = 0; 6007 6008 PetscCall(PetscFree(bufj)); 6009 if (!startsj_s || !bufa_ptr) { 6010 PetscCall(PetscFree2(sstartsj, rstartsj)); 6011 PetscCall(PetscFree(bufa_ptr)); 6012 } else { 6013 *startsj_s = sstartsj; 6014 *startsj_r = rstartsj; 6015 *bufa_ptr = bufa; 6016 } 6017 } else if (scall == MAT_REUSE_MATRIX) { 6018 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6019 } 6020 6021 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6022 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6023 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6024 PetscFunctionReturn(PETSC_SUCCESS); 6025 } 6026 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6030 #if defined(PETSC_HAVE_MKL_SPARSE) 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6032 #endif 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6035 #if defined(PETSC_HAVE_ELEMENTAL) 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6037 #endif 6038 #if defined(PETSC_HAVE_SCALAPACK) 6039 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6040 #endif 6041 #if defined(PETSC_HAVE_HYPRE) 6042 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6043 #endif 6044 #if defined(PETSC_HAVE_CUDA) 6045 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6046 #endif 6047 #if defined(PETSC_HAVE_HIP) 6048 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6049 #endif 6050 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6052 #endif 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6054 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6056 6057 /* 6058 Computes (B'*A')' since computing B*A directly is untenable 6059 6060 n p p 6061 [ ] [ ] [ ] 6062 m [ A ] * n [ B ] = m [ C ] 6063 [ ] [ ] [ ] 6064 6065 */ 6066 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6067 { 6068 Mat At, Bt, Ct; 6069 6070 PetscFunctionBegin; 6071 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6072 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6073 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6074 PetscCall(MatDestroy(&At)); 6075 PetscCall(MatDestroy(&Bt)); 6076 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6077 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6078 PetscCall(MatDestroy(&Ct)); 6079 PetscFunctionReturn(PETSC_SUCCESS); 6080 } 6081 6082 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6083 { 6084 PetscBool cisdense; 6085 6086 PetscFunctionBegin; 6087 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6088 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6089 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6090 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6091 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6092 PetscCall(MatSetUp(C)); 6093 6094 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6095 PetscFunctionReturn(PETSC_SUCCESS); 6096 } 6097 6098 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6099 { 6100 Mat_Product *product = C->product; 6101 Mat A = product->A, B = product->B; 6102 6103 PetscFunctionBegin; 6104 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6105 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6106 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6107 C->ops->productsymbolic = MatProductSymbolic_AB; 6108 PetscFunctionReturn(PETSC_SUCCESS); 6109 } 6110 6111 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6112 { 6113 Mat_Product *product = C->product; 6114 6115 PetscFunctionBegin; 6116 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6117 PetscFunctionReturn(PETSC_SUCCESS); 6118 } 6119 6120 /* 6121 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6122 6123 Input Parameters: 6124 6125 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6126 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6127 6128 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6129 6130 For Set1, j1[] contains column indices of the nonzeros. 6131 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6132 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6133 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6134 6135 Similar for Set2. 6136 6137 This routine merges the two sets of nonzeros row by row and removes repeats. 6138 6139 Output Parameters: (memory is allocated by the caller) 6140 6141 i[],j[]: the CSR of the merged matrix, which has m rows. 6142 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6143 imap2[]: similar to imap1[], but for Set2. 6144 Note we order nonzeros row-by-row and from left to right. 6145 */ 6146 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6147 { 6148 PetscInt r, m; /* Row index of mat */ 6149 PetscCount t, t1, t2, b1, e1, b2, e2; 6150 6151 PetscFunctionBegin; 6152 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6153 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6154 i[0] = 0; 6155 for (r = 0; r < m; r++) { /* Do row by row merging */ 6156 b1 = rowBegin1[r]; 6157 e1 = rowEnd1[r]; 6158 b2 = rowBegin2[r]; 6159 e2 = rowEnd2[r]; 6160 while (b1 < e1 && b2 < e2) { 6161 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6162 j[t] = j1[b1]; 6163 imap1[t1] = t; 6164 imap2[t2] = t; 6165 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6166 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6167 t1++; 6168 t2++; 6169 t++; 6170 } else if (j1[b1] < j2[b2]) { 6171 j[t] = j1[b1]; 6172 imap1[t1] = t; 6173 b1 += jmap1[t1 + 1] - jmap1[t1]; 6174 t1++; 6175 t++; 6176 } else { 6177 j[t] = j2[b2]; 6178 imap2[t2] = t; 6179 b2 += jmap2[t2 + 1] - jmap2[t2]; 6180 t2++; 6181 t++; 6182 } 6183 } 6184 /* Merge the remaining in either j1[] or j2[] */ 6185 while (b1 < e1) { 6186 j[t] = j1[b1]; 6187 imap1[t1] = t; 6188 b1 += jmap1[t1 + 1] - jmap1[t1]; 6189 t1++; 6190 t++; 6191 } 6192 while (b2 < e2) { 6193 j[t] = j2[b2]; 6194 imap2[t2] = t; 6195 b2 += jmap2[t2 + 1] - jmap2[t2]; 6196 t2++; 6197 t++; 6198 } 6199 i[r + 1] = t; 6200 } 6201 PetscFunctionReturn(PETSC_SUCCESS); 6202 } 6203 6204 /* 6205 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6206 6207 Input Parameters: 6208 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6209 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6210 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6211 6212 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6213 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6214 6215 Output Parameters: 6216 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6217 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6218 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6219 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6220 6221 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6222 Atot: number of entries belonging to the diagonal block. 6223 Annz: number of unique nonzeros belonging to the diagonal block. 6224 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6225 repeats (i.e., same 'i,j' pair). 6226 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6227 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6228 6229 Atot: number of entries belonging to the diagonal block 6230 Annz: number of unique nonzeros belonging to the diagonal block. 6231 6232 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6233 6234 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6235 */ 6236 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6237 { 6238 PetscInt cstart, cend, rstart, rend, row, col; 6239 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6240 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6241 PetscCount k, m, p, q, r, s, mid; 6242 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6243 6244 PetscFunctionBegin; 6245 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6246 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6247 m = rend - rstart; 6248 6249 /* Skip negative rows */ 6250 for (k = 0; k < n; k++) 6251 if (i[k] >= 0) break; 6252 6253 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6254 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6255 */ 6256 while (k < n) { 6257 row = i[k]; 6258 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6259 for (s = k; s < n; s++) 6260 if (i[s] != row) break; 6261 6262 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6263 for (p = k; p < s; p++) { 6264 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6265 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6266 } 6267 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6268 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6269 rowBegin[row - rstart] = k; 6270 rowMid[row - rstart] = mid; 6271 rowEnd[row - rstart] = s; 6272 6273 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6274 Atot += mid - k; 6275 Btot += s - mid; 6276 6277 /* Count unique nonzeros of this diag row */ 6278 for (p = k; p < mid;) { 6279 col = j[p]; 6280 do { 6281 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6282 p++; 6283 } while (p < mid && j[p] == col); 6284 Annz++; 6285 } 6286 6287 /* Count unique nonzeros of this offdiag row */ 6288 for (p = mid; p < s;) { 6289 col = j[p]; 6290 do { 6291 p++; 6292 } while (p < s && j[p] == col); 6293 Bnnz++; 6294 } 6295 k = s; 6296 } 6297 6298 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6299 PetscCall(PetscMalloc1(Atot, &Aperm)); 6300 PetscCall(PetscMalloc1(Btot, &Bperm)); 6301 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6302 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6303 6304 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6305 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6306 for (r = 0; r < m; r++) { 6307 k = rowBegin[r]; 6308 mid = rowMid[r]; 6309 s = rowEnd[r]; 6310 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6311 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6312 Atot += mid - k; 6313 Btot += s - mid; 6314 6315 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6316 for (p = k; p < mid;) { 6317 col = j[p]; 6318 q = p; 6319 do { 6320 p++; 6321 } while (p < mid && j[p] == col); 6322 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6323 Annz++; 6324 } 6325 6326 for (p = mid; p < s;) { 6327 col = j[p]; 6328 q = p; 6329 do { 6330 p++; 6331 } while (p < s && j[p] == col); 6332 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6333 Bnnz++; 6334 } 6335 } 6336 /* Output */ 6337 *Aperm_ = Aperm; 6338 *Annz_ = Annz; 6339 *Atot_ = Atot; 6340 *Ajmap_ = Ajmap; 6341 *Bperm_ = Bperm; 6342 *Bnnz_ = Bnnz; 6343 *Btot_ = Btot; 6344 *Bjmap_ = Bjmap; 6345 PetscFunctionReturn(PETSC_SUCCESS); 6346 } 6347 6348 /* 6349 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6350 6351 Input Parameters: 6352 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6353 nnz: number of unique nonzeros in the merged matrix 6354 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6355 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6356 6357 Output Parameter: (memory is allocated by the caller) 6358 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6359 6360 Example: 6361 nnz1 = 4 6362 nnz = 6 6363 imap = [1,3,4,5] 6364 jmap = [0,3,5,6,7] 6365 then, 6366 jmap_new = [0,0,3,3,5,6,7] 6367 */ 6368 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6369 { 6370 PetscCount k, p; 6371 6372 PetscFunctionBegin; 6373 jmap_new[0] = 0; 6374 p = nnz; /* p loops over jmap_new[] backwards */ 6375 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6376 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6377 } 6378 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6379 PetscFunctionReturn(PETSC_SUCCESS); 6380 } 6381 6382 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6383 { 6384 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6385 6386 PetscFunctionBegin; 6387 PetscCall(PetscSFDestroy(&coo->sf)); 6388 PetscCall(PetscFree(coo->Aperm1)); 6389 PetscCall(PetscFree(coo->Bperm1)); 6390 PetscCall(PetscFree(coo->Ajmap1)); 6391 PetscCall(PetscFree(coo->Bjmap1)); 6392 PetscCall(PetscFree(coo->Aimap2)); 6393 PetscCall(PetscFree(coo->Bimap2)); 6394 PetscCall(PetscFree(coo->Aperm2)); 6395 PetscCall(PetscFree(coo->Bperm2)); 6396 PetscCall(PetscFree(coo->Ajmap2)); 6397 PetscCall(PetscFree(coo->Bjmap2)); 6398 PetscCall(PetscFree(coo->Cperm1)); 6399 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6400 PetscCall(PetscFree(coo)); 6401 PetscFunctionReturn(PETSC_SUCCESS); 6402 } 6403 6404 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6405 { 6406 MPI_Comm comm; 6407 PetscMPIInt rank, size; 6408 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6409 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6410 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6411 PetscContainer container; 6412 MatCOOStruct_MPIAIJ *coo; 6413 6414 PetscFunctionBegin; 6415 PetscCall(PetscFree(mpiaij->garray)); 6416 PetscCall(VecDestroy(&mpiaij->lvec)); 6417 #if defined(PETSC_USE_CTABLE) 6418 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6419 #else 6420 PetscCall(PetscFree(mpiaij->colmap)); 6421 #endif 6422 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6423 mat->assembled = PETSC_FALSE; 6424 mat->was_assembled = PETSC_FALSE; 6425 6426 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6427 PetscCallMPI(MPI_Comm_size(comm, &size)); 6428 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6429 PetscCall(PetscLayoutSetUp(mat->rmap)); 6430 PetscCall(PetscLayoutSetUp(mat->cmap)); 6431 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6432 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6433 PetscCall(MatGetLocalSize(mat, &m, &n)); 6434 PetscCall(MatGetSize(mat, &M, &N)); 6435 6436 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6437 /* entries come first, then local rows, then remote rows. */ 6438 PetscCount n1 = coo_n, *perm1; 6439 PetscInt *i1 = coo_i, *j1 = coo_j; 6440 6441 PetscCall(PetscMalloc1(n1, &perm1)); 6442 for (k = 0; k < n1; k++) perm1[k] = k; 6443 6444 /* Manipulate indices so that entries with negative row or col indices will have smallest 6445 row indices, local entries will have greater but negative row indices, and remote entries 6446 will have positive row indices. 6447 */ 6448 for (k = 0; k < n1; k++) { 6449 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6450 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6451 else { 6452 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6453 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6454 } 6455 } 6456 6457 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6458 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6459 6460 /* Advance k to the first entry we need to take care of */ 6461 for (k = 0; k < n1; k++) 6462 if (i1[k] > PETSC_MIN_INT) break; 6463 PetscInt i1start = k; 6464 6465 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6466 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6467 6468 /* Send remote rows to their owner */ 6469 /* Find which rows should be sent to which remote ranks*/ 6470 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6471 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6472 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6473 const PetscInt *ranges; 6474 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6475 6476 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6477 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6478 for (k = rem; k < n1;) { 6479 PetscMPIInt owner; 6480 PetscInt firstRow, lastRow; 6481 6482 /* Locate a row range */ 6483 firstRow = i1[k]; /* first row of this owner */ 6484 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6485 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6486 6487 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6488 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6489 6490 /* All entries in [k,p) belong to this remote owner */ 6491 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6492 PetscMPIInt *sendto2; 6493 PetscInt *nentries2; 6494 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6495 6496 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6497 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6498 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6499 PetscCall(PetscFree2(sendto, nentries2)); 6500 sendto = sendto2; 6501 nentries = nentries2; 6502 maxNsend = maxNsend2; 6503 } 6504 sendto[nsend] = owner; 6505 nentries[nsend] = p - k; 6506 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6507 nsend++; 6508 k = p; 6509 } 6510 6511 /* Build 1st SF to know offsets on remote to send data */ 6512 PetscSF sf1; 6513 PetscInt nroots = 1, nroots2 = 0; 6514 PetscInt nleaves = nsend, nleaves2 = 0; 6515 PetscInt *offsets; 6516 PetscSFNode *iremote; 6517 6518 PetscCall(PetscSFCreate(comm, &sf1)); 6519 PetscCall(PetscMalloc1(nsend, &iremote)); 6520 PetscCall(PetscMalloc1(nsend, &offsets)); 6521 for (k = 0; k < nsend; k++) { 6522 iremote[k].rank = sendto[k]; 6523 iremote[k].index = 0; 6524 nleaves2 += nentries[k]; 6525 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6526 } 6527 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6528 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6529 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6530 PetscCall(PetscSFDestroy(&sf1)); 6531 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6532 6533 /* Build 2nd SF to send remote COOs to their owner */ 6534 PetscSF sf2; 6535 nroots = nroots2; 6536 nleaves = nleaves2; 6537 PetscCall(PetscSFCreate(comm, &sf2)); 6538 PetscCall(PetscSFSetFromOptions(sf2)); 6539 PetscCall(PetscMalloc1(nleaves, &iremote)); 6540 p = 0; 6541 for (k = 0; k < nsend; k++) { 6542 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6543 for (q = 0; q < nentries[k]; q++, p++) { 6544 iremote[p].rank = sendto[k]; 6545 iremote[p].index = offsets[k] + q; 6546 } 6547 } 6548 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6549 6550 /* Send the remote COOs to their owner */ 6551 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6552 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6553 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6554 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6555 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6556 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6557 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6558 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6562 6563 PetscCall(PetscFree(offsets)); 6564 PetscCall(PetscFree2(sendto, nentries)); 6565 6566 /* Sort received COOs by row along with the permutation array */ 6567 for (k = 0; k < n2; k++) perm2[k] = k; 6568 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6569 6570 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6571 PetscCount *Cperm1; 6572 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6573 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6574 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6575 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6576 6577 /* Support for HYPRE matrices, kind of a hack. 6578 Swap min column with diagonal so that diagonal values will go first */ 6579 PetscBool hypre; 6580 const char *name; 6581 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6582 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6583 if (hypre) { 6584 PetscInt *minj; 6585 PetscBT hasdiag; 6586 6587 PetscCall(PetscBTCreate(m, &hasdiag)); 6588 PetscCall(PetscMalloc1(m, &minj)); 6589 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6590 for (k = i1start; k < rem; k++) { 6591 if (j1[k] < cstart || j1[k] >= cend) continue; 6592 const PetscInt rindex = i1[k] - rstart; 6593 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6594 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6595 } 6596 for (k = 0; k < n2; k++) { 6597 if (j2[k] < cstart || j2[k] >= cend) continue; 6598 const PetscInt rindex = i2[k] - rstart; 6599 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6600 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6601 } 6602 for (k = i1start; k < rem; k++) { 6603 const PetscInt rindex = i1[k] - rstart; 6604 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6605 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6606 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6607 } 6608 for (k = 0; k < n2; k++) { 6609 const PetscInt rindex = i2[k] - rstart; 6610 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6611 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6612 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6613 } 6614 PetscCall(PetscBTDestroy(&hasdiag)); 6615 PetscCall(PetscFree(minj)); 6616 } 6617 6618 /* Split local COOs and received COOs into diag/offdiag portions */ 6619 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6620 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6621 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6622 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6623 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6624 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6625 6626 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6627 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6628 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6629 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6630 6631 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6632 PetscInt *Ai, *Bi; 6633 PetscInt *Aj, *Bj; 6634 6635 PetscCall(PetscMalloc1(m + 1, &Ai)); 6636 PetscCall(PetscMalloc1(m + 1, &Bi)); 6637 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6638 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6639 6640 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6641 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6642 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6643 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6644 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6645 6646 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6647 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6648 6649 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6650 /* expect nonzeros in A/B most likely have local contributing entries */ 6651 PetscInt Annz = Ai[m]; 6652 PetscInt Bnnz = Bi[m]; 6653 PetscCount *Ajmap1_new, *Bjmap1_new; 6654 6655 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6656 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6657 6658 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6659 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6660 6661 PetscCall(PetscFree(Aimap1)); 6662 PetscCall(PetscFree(Ajmap1)); 6663 PetscCall(PetscFree(Bimap1)); 6664 PetscCall(PetscFree(Bjmap1)); 6665 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6666 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6667 PetscCall(PetscFree(perm1)); 6668 PetscCall(PetscFree3(i2, j2, perm2)); 6669 6670 Ajmap1 = Ajmap1_new; 6671 Bjmap1 = Bjmap1_new; 6672 6673 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6674 if (Annz < Annz1 + Annz2) { 6675 PetscInt *Aj_new; 6676 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6677 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6678 PetscCall(PetscFree(Aj)); 6679 Aj = Aj_new; 6680 } 6681 6682 if (Bnnz < Bnnz1 + Bnnz2) { 6683 PetscInt *Bj_new; 6684 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6685 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6686 PetscCall(PetscFree(Bj)); 6687 Bj = Bj_new; 6688 } 6689 6690 /* Create new submatrices for on-process and off-process coupling */ 6691 PetscScalar *Aa, *Ba; 6692 MatType rtype; 6693 Mat_SeqAIJ *a, *b; 6694 PetscObjectState state; 6695 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6696 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6697 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6698 if (cstart) { 6699 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6700 } 6701 6702 PetscCall(MatGetRootType_Private(mat, &rtype)); 6703 6704 MatSeqXAIJGetOptions_Private(mpiaij->A); 6705 PetscCall(MatDestroy(&mpiaij->A)); 6706 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6707 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6708 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6709 6710 MatSeqXAIJGetOptions_Private(mpiaij->B); 6711 PetscCall(MatDestroy(&mpiaij->B)); 6712 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6713 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6714 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6715 6716 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6717 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6718 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6719 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6720 6721 a = (Mat_SeqAIJ *)mpiaij->A->data; 6722 b = (Mat_SeqAIJ *)mpiaij->B->data; 6723 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6724 a->free_a = b->free_a = PETSC_TRUE; 6725 a->free_ij = b->free_ij = PETSC_TRUE; 6726 6727 /* conversion must happen AFTER multiply setup */ 6728 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6729 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6730 PetscCall(VecDestroy(&mpiaij->lvec)); 6731 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6732 6733 // Put the COO struct in a container and then attach that to the matrix 6734 PetscCall(PetscMalloc1(1, &coo)); 6735 coo->n = coo_n; 6736 coo->sf = sf2; 6737 coo->sendlen = nleaves; 6738 coo->recvlen = nroots; 6739 coo->Annz = Annz; 6740 coo->Bnnz = Bnnz; 6741 coo->Annz2 = Annz2; 6742 coo->Bnnz2 = Bnnz2; 6743 coo->Atot1 = Atot1; 6744 coo->Atot2 = Atot2; 6745 coo->Btot1 = Btot1; 6746 coo->Btot2 = Btot2; 6747 coo->Ajmap1 = Ajmap1; 6748 coo->Aperm1 = Aperm1; 6749 coo->Bjmap1 = Bjmap1; 6750 coo->Bperm1 = Bperm1; 6751 coo->Aimap2 = Aimap2; 6752 coo->Ajmap2 = Ajmap2; 6753 coo->Aperm2 = Aperm2; 6754 coo->Bimap2 = Bimap2; 6755 coo->Bjmap2 = Bjmap2; 6756 coo->Bperm2 = Bperm2; 6757 coo->Cperm1 = Cperm1; 6758 // Allocate in preallocation. If not used, it has zero cost on host 6759 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6760 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6761 PetscCall(PetscContainerSetPointer(container, coo)); 6762 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6763 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6764 PetscCall(PetscContainerDestroy(&container)); 6765 PetscFunctionReturn(PETSC_SUCCESS); 6766 } 6767 6768 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6769 { 6770 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6771 Mat A = mpiaij->A, B = mpiaij->B; 6772 PetscScalar *Aa, *Ba; 6773 PetscScalar *sendbuf, *recvbuf; 6774 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6775 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6776 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6777 const PetscCount *Cperm1; 6778 PetscContainer container; 6779 MatCOOStruct_MPIAIJ *coo; 6780 6781 PetscFunctionBegin; 6782 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6783 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6784 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6785 sendbuf = coo->sendbuf; 6786 recvbuf = coo->recvbuf; 6787 Ajmap1 = coo->Ajmap1; 6788 Ajmap2 = coo->Ajmap2; 6789 Aimap2 = coo->Aimap2; 6790 Bjmap1 = coo->Bjmap1; 6791 Bjmap2 = coo->Bjmap2; 6792 Bimap2 = coo->Bimap2; 6793 Aperm1 = coo->Aperm1; 6794 Aperm2 = coo->Aperm2; 6795 Bperm1 = coo->Bperm1; 6796 Bperm2 = coo->Bperm2; 6797 Cperm1 = coo->Cperm1; 6798 6799 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6800 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6801 6802 /* Pack entries to be sent to remote */ 6803 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6804 6805 /* Send remote entries to their owner and overlap the communication with local computation */ 6806 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6807 /* Add local entries to A and B */ 6808 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6809 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6810 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6811 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6812 } 6813 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6814 PetscScalar sum = 0.0; 6815 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6816 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6817 } 6818 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6819 6820 /* Add received remote entries to A and B */ 6821 for (PetscCount i = 0; i < coo->Annz2; i++) { 6822 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6823 } 6824 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6825 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6826 } 6827 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6828 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6829 PetscFunctionReturn(PETSC_SUCCESS); 6830 } 6831 6832 /*MC 6833 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6834 6835 Options Database Keys: 6836 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6837 6838 Level: beginner 6839 6840 Notes: 6841 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6842 in this case the values associated with the rows and columns one passes in are set to zero 6843 in the matrix 6844 6845 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6846 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6847 6848 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6849 M*/ 6850 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6851 { 6852 Mat_MPIAIJ *b; 6853 PetscMPIInt size; 6854 6855 PetscFunctionBegin; 6856 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6857 6858 PetscCall(PetscNew(&b)); 6859 B->data = (void *)b; 6860 B->ops[0] = MatOps_Values; 6861 B->assembled = PETSC_FALSE; 6862 B->insertmode = NOT_SET_VALUES; 6863 b->size = size; 6864 6865 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6866 6867 /* build cache for off array entries formed */ 6868 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6869 6870 b->donotstash = PETSC_FALSE; 6871 b->colmap = NULL; 6872 b->garray = NULL; 6873 b->roworiented = PETSC_TRUE; 6874 6875 /* stuff used for matrix vector multiply */ 6876 b->lvec = NULL; 6877 b->Mvctx = NULL; 6878 6879 /* stuff for MatGetRow() */ 6880 b->rowindices = NULL; 6881 b->rowvalues = NULL; 6882 b->getrowactive = PETSC_FALSE; 6883 6884 /* flexible pointer used in CUSPARSE classes */ 6885 b->spptr = NULL; 6886 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6897 #if defined(PETSC_HAVE_CUDA) 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6899 #endif 6900 #if defined(PETSC_HAVE_HIP) 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6902 #endif 6903 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6905 #endif 6906 #if defined(PETSC_HAVE_MKL_SPARSE) 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6908 #endif 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6913 #if defined(PETSC_HAVE_ELEMENTAL) 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6915 #endif 6916 #if defined(PETSC_HAVE_SCALAPACK) 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6918 #endif 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6921 #if defined(PETSC_HAVE_HYPRE) 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6924 #endif 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6929 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6930 PetscFunctionReturn(PETSC_SUCCESS); 6931 } 6932 6933 /*@C 6934 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6935 and "off-diagonal" part of the matrix in CSR format. 6936 6937 Collective 6938 6939 Input Parameters: 6940 + comm - MPI communicator 6941 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6942 . n - This value should be the same as the local size used in creating the 6943 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6944 calculated if `N` is given) For square matrices `n` is almost always `m`. 6945 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6946 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6947 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6948 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6949 . a - matrix values 6950 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6951 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6952 - oa - matrix values 6953 6954 Output Parameter: 6955 . mat - the matrix 6956 6957 Level: advanced 6958 6959 Notes: 6960 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6961 must free the arrays once the matrix has been destroyed and not before. 6962 6963 The `i` and `j` indices are 0 based 6964 6965 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6966 6967 This sets local rows and cannot be used to set off-processor values. 6968 6969 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6970 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6971 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6972 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6973 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6974 communication if it is known that only local entries will be set. 6975 6976 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6977 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6978 @*/ 6979 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6980 { 6981 Mat_MPIAIJ *maij; 6982 6983 PetscFunctionBegin; 6984 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6985 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6986 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6987 PetscCall(MatCreate(comm, mat)); 6988 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6989 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6990 maij = (Mat_MPIAIJ *)(*mat)->data; 6991 6992 (*mat)->preallocated = PETSC_TRUE; 6993 6994 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6995 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6996 6997 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6998 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6999 7000 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7001 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7002 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7003 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7004 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7005 PetscFunctionReturn(PETSC_SUCCESS); 7006 } 7007 7008 typedef struct { 7009 Mat *mp; /* intermediate products */ 7010 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7011 PetscInt cp; /* number of intermediate products */ 7012 7013 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7014 PetscInt *startsj_s, *startsj_r; 7015 PetscScalar *bufa; 7016 Mat P_oth; 7017 7018 /* may take advantage of merging product->B */ 7019 Mat Bloc; /* B-local by merging diag and off-diag */ 7020 7021 /* cusparse does not have support to split between symbolic and numeric phases. 7022 When api_user is true, we don't need to update the numerical values 7023 of the temporary storage */ 7024 PetscBool reusesym; 7025 7026 /* support for COO values insertion */ 7027 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7028 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7029 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7030 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7031 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7032 PetscMemType mtype; 7033 7034 /* customization */ 7035 PetscBool abmerge; 7036 PetscBool P_oth_bind; 7037 } MatMatMPIAIJBACKEND; 7038 7039 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7040 { 7041 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7042 PetscInt i; 7043 7044 PetscFunctionBegin; 7045 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7046 PetscCall(PetscFree(mmdata->bufa)); 7047 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7048 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7049 PetscCall(MatDestroy(&mmdata->P_oth)); 7050 PetscCall(MatDestroy(&mmdata->Bloc)); 7051 PetscCall(PetscSFDestroy(&mmdata->sf)); 7052 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7053 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7054 PetscCall(PetscFree(mmdata->own[0])); 7055 PetscCall(PetscFree(mmdata->own)); 7056 PetscCall(PetscFree(mmdata->off[0])); 7057 PetscCall(PetscFree(mmdata->off)); 7058 PetscCall(PetscFree(mmdata)); 7059 PetscFunctionReturn(PETSC_SUCCESS); 7060 } 7061 7062 /* Copy selected n entries with indices in idx[] of A to v[]. 7063 If idx is NULL, copy the whole data array of A to v[] 7064 */ 7065 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7066 { 7067 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7068 7069 PetscFunctionBegin; 7070 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7071 if (f) { 7072 PetscCall((*f)(A, n, idx, v)); 7073 } else { 7074 const PetscScalar *vv; 7075 7076 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7077 if (n && idx) { 7078 PetscScalar *w = v; 7079 const PetscInt *oi = idx; 7080 PetscInt j; 7081 7082 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7083 } else { 7084 PetscCall(PetscArraycpy(v, vv, n)); 7085 } 7086 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7087 } 7088 PetscFunctionReturn(PETSC_SUCCESS); 7089 } 7090 7091 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7092 { 7093 MatMatMPIAIJBACKEND *mmdata; 7094 PetscInt i, n_d, n_o; 7095 7096 PetscFunctionBegin; 7097 MatCheckProduct(C, 1); 7098 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7099 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7100 if (!mmdata->reusesym) { /* update temporary matrices */ 7101 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7102 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7103 } 7104 mmdata->reusesym = PETSC_FALSE; 7105 7106 for (i = 0; i < mmdata->cp; i++) { 7107 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7108 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7109 } 7110 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7111 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7112 7113 if (mmdata->mptmp[i]) continue; 7114 if (noff) { 7115 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7116 7117 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7118 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7119 n_o += noff; 7120 n_d += nown; 7121 } else { 7122 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7123 7124 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7125 n_d += mm->nz; 7126 } 7127 } 7128 if (mmdata->hasoffproc) { /* offprocess insertion */ 7129 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7130 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7131 } 7132 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7133 PetscFunctionReturn(PETSC_SUCCESS); 7134 } 7135 7136 /* Support for Pt * A, A * P, or Pt * A * P */ 7137 #define MAX_NUMBER_INTERMEDIATE 4 7138 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7139 { 7140 Mat_Product *product = C->product; 7141 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7142 Mat_MPIAIJ *a, *p; 7143 MatMatMPIAIJBACKEND *mmdata; 7144 ISLocalToGlobalMapping P_oth_l2g = NULL; 7145 IS glob = NULL; 7146 const char *prefix; 7147 char pprefix[256]; 7148 const PetscInt *globidx, *P_oth_idx; 7149 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7150 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7151 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7152 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7153 /* a base offset; type-2: sparse with a local to global map table */ 7154 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7155 7156 MatProductType ptype; 7157 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7158 PetscMPIInt size; 7159 7160 PetscFunctionBegin; 7161 MatCheckProduct(C, 1); 7162 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7163 ptype = product->type; 7164 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7165 ptype = MATPRODUCT_AB; 7166 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7167 } 7168 switch (ptype) { 7169 case MATPRODUCT_AB: 7170 A = product->A; 7171 P = product->B; 7172 m = A->rmap->n; 7173 n = P->cmap->n; 7174 M = A->rmap->N; 7175 N = P->cmap->N; 7176 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7177 break; 7178 case MATPRODUCT_AtB: 7179 P = product->A; 7180 A = product->B; 7181 m = P->cmap->n; 7182 n = A->cmap->n; 7183 M = P->cmap->N; 7184 N = A->cmap->N; 7185 hasoffproc = PETSC_TRUE; 7186 break; 7187 case MATPRODUCT_PtAP: 7188 A = product->A; 7189 P = product->B; 7190 m = P->cmap->n; 7191 n = P->cmap->n; 7192 M = P->cmap->N; 7193 N = P->cmap->N; 7194 hasoffproc = PETSC_TRUE; 7195 break; 7196 default: 7197 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7198 } 7199 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7200 if (size == 1) hasoffproc = PETSC_FALSE; 7201 7202 /* defaults */ 7203 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7204 mp[i] = NULL; 7205 mptmp[i] = PETSC_FALSE; 7206 rmapt[i] = -1; 7207 cmapt[i] = -1; 7208 rmapa[i] = NULL; 7209 cmapa[i] = NULL; 7210 } 7211 7212 /* customization */ 7213 PetscCall(PetscNew(&mmdata)); 7214 mmdata->reusesym = product->api_user; 7215 if (ptype == MATPRODUCT_AB) { 7216 if (product->api_user) { 7217 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7218 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7219 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7220 PetscOptionsEnd(); 7221 } else { 7222 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7223 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7224 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7225 PetscOptionsEnd(); 7226 } 7227 } else if (ptype == MATPRODUCT_PtAP) { 7228 if (product->api_user) { 7229 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7230 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7231 PetscOptionsEnd(); 7232 } else { 7233 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7234 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7235 PetscOptionsEnd(); 7236 } 7237 } 7238 a = (Mat_MPIAIJ *)A->data; 7239 p = (Mat_MPIAIJ *)P->data; 7240 PetscCall(MatSetSizes(C, m, n, M, N)); 7241 PetscCall(PetscLayoutSetUp(C->rmap)); 7242 PetscCall(PetscLayoutSetUp(C->cmap)); 7243 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7244 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7245 7246 cp = 0; 7247 switch (ptype) { 7248 case MATPRODUCT_AB: /* A * P */ 7249 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7250 7251 /* A_diag * P_local (merged or not) */ 7252 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7253 /* P is product->B */ 7254 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7255 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7256 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7257 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7258 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7259 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7260 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7261 mp[cp]->product->api_user = product->api_user; 7262 PetscCall(MatProductSetFromOptions(mp[cp])); 7263 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7264 PetscCall(ISGetIndices(glob, &globidx)); 7265 rmapt[cp] = 1; 7266 cmapt[cp] = 2; 7267 cmapa[cp] = globidx; 7268 mptmp[cp] = PETSC_FALSE; 7269 cp++; 7270 } else { /* A_diag * P_diag and A_diag * P_off */ 7271 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7272 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7273 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7274 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7275 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7276 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7277 mp[cp]->product->api_user = product->api_user; 7278 PetscCall(MatProductSetFromOptions(mp[cp])); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 rmapt[cp] = 1; 7281 cmapt[cp] = 1; 7282 mptmp[cp] = PETSC_FALSE; 7283 cp++; 7284 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7285 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7286 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7287 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7288 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7289 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7290 mp[cp]->product->api_user = product->api_user; 7291 PetscCall(MatProductSetFromOptions(mp[cp])); 7292 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7293 rmapt[cp] = 1; 7294 cmapt[cp] = 2; 7295 cmapa[cp] = p->garray; 7296 mptmp[cp] = PETSC_FALSE; 7297 cp++; 7298 } 7299 7300 /* A_off * P_other */ 7301 if (mmdata->P_oth) { 7302 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7303 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7304 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7305 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7306 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7307 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7308 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7309 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7310 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7311 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7312 mp[cp]->product->api_user = product->api_user; 7313 PetscCall(MatProductSetFromOptions(mp[cp])); 7314 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7315 rmapt[cp] = 1; 7316 cmapt[cp] = 2; 7317 cmapa[cp] = P_oth_idx; 7318 mptmp[cp] = PETSC_FALSE; 7319 cp++; 7320 } 7321 break; 7322 7323 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7324 /* A is product->B */ 7325 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7326 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7327 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7328 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7329 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7330 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7331 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7332 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7333 mp[cp]->product->api_user = product->api_user; 7334 PetscCall(MatProductSetFromOptions(mp[cp])); 7335 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7336 PetscCall(ISGetIndices(glob, &globidx)); 7337 rmapt[cp] = 2; 7338 rmapa[cp] = globidx; 7339 cmapt[cp] = 2; 7340 cmapa[cp] = globidx; 7341 mptmp[cp] = PETSC_FALSE; 7342 cp++; 7343 } else { 7344 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7345 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7346 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7347 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7348 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7349 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7350 mp[cp]->product->api_user = product->api_user; 7351 PetscCall(MatProductSetFromOptions(mp[cp])); 7352 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7353 PetscCall(ISGetIndices(glob, &globidx)); 7354 rmapt[cp] = 1; 7355 cmapt[cp] = 2; 7356 cmapa[cp] = globidx; 7357 mptmp[cp] = PETSC_FALSE; 7358 cp++; 7359 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7360 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7361 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7362 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7363 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7364 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7365 mp[cp]->product->api_user = product->api_user; 7366 PetscCall(MatProductSetFromOptions(mp[cp])); 7367 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7368 rmapt[cp] = 2; 7369 rmapa[cp] = p->garray; 7370 cmapt[cp] = 2; 7371 cmapa[cp] = globidx; 7372 mptmp[cp] = PETSC_FALSE; 7373 cp++; 7374 } 7375 break; 7376 case MATPRODUCT_PtAP: 7377 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7378 /* P is product->B */ 7379 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7380 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7381 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7382 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7383 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7384 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7385 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7386 mp[cp]->product->api_user = product->api_user; 7387 PetscCall(MatProductSetFromOptions(mp[cp])); 7388 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7389 PetscCall(ISGetIndices(glob, &globidx)); 7390 rmapt[cp] = 2; 7391 rmapa[cp] = globidx; 7392 cmapt[cp] = 2; 7393 cmapa[cp] = globidx; 7394 mptmp[cp] = PETSC_FALSE; 7395 cp++; 7396 if (mmdata->P_oth) { 7397 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7398 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7399 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7400 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7401 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7402 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7403 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7404 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7405 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7406 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7407 mp[cp]->product->api_user = product->api_user; 7408 PetscCall(MatProductSetFromOptions(mp[cp])); 7409 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7410 mptmp[cp] = PETSC_TRUE; 7411 cp++; 7412 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7413 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7414 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7415 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7416 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7417 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7418 mp[cp]->product->api_user = product->api_user; 7419 PetscCall(MatProductSetFromOptions(mp[cp])); 7420 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7421 rmapt[cp] = 2; 7422 rmapa[cp] = globidx; 7423 cmapt[cp] = 2; 7424 cmapa[cp] = P_oth_idx; 7425 mptmp[cp] = PETSC_FALSE; 7426 cp++; 7427 } 7428 break; 7429 default: 7430 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7431 } 7432 /* sanity check */ 7433 if (size > 1) 7434 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7435 7436 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7437 for (i = 0; i < cp; i++) { 7438 mmdata->mp[i] = mp[i]; 7439 mmdata->mptmp[i] = mptmp[i]; 7440 } 7441 mmdata->cp = cp; 7442 C->product->data = mmdata; 7443 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7444 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7445 7446 /* memory type */ 7447 mmdata->mtype = PETSC_MEMTYPE_HOST; 7448 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7449 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7450 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7451 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7452 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7453 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7454 7455 /* prepare coo coordinates for values insertion */ 7456 7457 /* count total nonzeros of those intermediate seqaij Mats 7458 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7459 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7460 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7461 */ 7462 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7463 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7464 if (mptmp[cp]) continue; 7465 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7466 const PetscInt *rmap = rmapa[cp]; 7467 const PetscInt mr = mp[cp]->rmap->n; 7468 const PetscInt rs = C->rmap->rstart; 7469 const PetscInt re = C->rmap->rend; 7470 const PetscInt *ii = mm->i; 7471 for (i = 0; i < mr; i++) { 7472 const PetscInt gr = rmap[i]; 7473 const PetscInt nz = ii[i + 1] - ii[i]; 7474 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7475 else ncoo_oown += nz; /* this row is local */ 7476 } 7477 } else ncoo_d += mm->nz; 7478 } 7479 7480 /* 7481 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7482 7483 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7484 7485 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7486 7487 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7488 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7489 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7490 7491 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7492 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7493 */ 7494 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7495 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7496 7497 /* gather (i,j) of nonzeros inserted by remote procs */ 7498 if (hasoffproc) { 7499 PetscSF msf; 7500 PetscInt ncoo2, *coo_i2, *coo_j2; 7501 7502 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7503 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7504 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7505 7506 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7507 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7508 PetscInt *idxoff = mmdata->off[cp]; 7509 PetscInt *idxown = mmdata->own[cp]; 7510 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7511 const PetscInt *rmap = rmapa[cp]; 7512 const PetscInt *cmap = cmapa[cp]; 7513 const PetscInt *ii = mm->i; 7514 PetscInt *coi = coo_i + ncoo_o; 7515 PetscInt *coj = coo_j + ncoo_o; 7516 const PetscInt mr = mp[cp]->rmap->n; 7517 const PetscInt rs = C->rmap->rstart; 7518 const PetscInt re = C->rmap->rend; 7519 const PetscInt cs = C->cmap->rstart; 7520 for (i = 0; i < mr; i++) { 7521 const PetscInt *jj = mm->j + ii[i]; 7522 const PetscInt gr = rmap[i]; 7523 const PetscInt nz = ii[i + 1] - ii[i]; 7524 if (gr < rs || gr >= re) { /* this is an offproc row */ 7525 for (j = ii[i]; j < ii[i + 1]; j++) { 7526 *coi++ = gr; 7527 *idxoff++ = j; 7528 } 7529 if (!cmapt[cp]) { /* already global */ 7530 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7531 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7532 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7533 } else { /* offdiag */ 7534 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7535 } 7536 ncoo_o += nz; 7537 } else { /* this is a local row */ 7538 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7539 } 7540 } 7541 } 7542 mmdata->off[cp + 1] = idxoff; 7543 mmdata->own[cp + 1] = idxown; 7544 } 7545 7546 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7547 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7548 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7549 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7550 ncoo = ncoo_d + ncoo_oown + ncoo2; 7551 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7552 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7553 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7554 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7555 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7556 PetscCall(PetscFree2(coo_i, coo_j)); 7557 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7558 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7559 coo_i = coo_i2; 7560 coo_j = coo_j2; 7561 } else { /* no offproc values insertion */ 7562 ncoo = ncoo_d; 7563 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7564 7565 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7566 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7567 PetscCall(PetscSFSetUp(mmdata->sf)); 7568 } 7569 mmdata->hasoffproc = hasoffproc; 7570 7571 /* gather (i,j) of nonzeros inserted locally */ 7572 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7573 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7574 PetscInt *coi = coo_i + ncoo_d; 7575 PetscInt *coj = coo_j + ncoo_d; 7576 const PetscInt *jj = mm->j; 7577 const PetscInt *ii = mm->i; 7578 const PetscInt *cmap = cmapa[cp]; 7579 const PetscInt *rmap = rmapa[cp]; 7580 const PetscInt mr = mp[cp]->rmap->n; 7581 const PetscInt rs = C->rmap->rstart; 7582 const PetscInt re = C->rmap->rend; 7583 const PetscInt cs = C->cmap->rstart; 7584 7585 if (mptmp[cp]) continue; 7586 if (rmapt[cp] == 1) { /* consecutive rows */ 7587 /* fill coo_i */ 7588 for (i = 0; i < mr; i++) { 7589 const PetscInt gr = i + rs; 7590 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7591 } 7592 /* fill coo_j */ 7593 if (!cmapt[cp]) { /* type-0, already global */ 7594 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7595 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7596 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7597 } else { /* type-2, local to global for sparse columns */ 7598 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7599 } 7600 ncoo_d += mm->nz; 7601 } else if (rmapt[cp] == 2) { /* sparse rows */ 7602 for (i = 0; i < mr; i++) { 7603 const PetscInt *jj = mm->j + ii[i]; 7604 const PetscInt gr = rmap[i]; 7605 const PetscInt nz = ii[i + 1] - ii[i]; 7606 if (gr >= rs && gr < re) { /* local rows */ 7607 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7608 if (!cmapt[cp]) { /* type-0, already global */ 7609 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7610 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7611 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7612 } else { /* type-2, local to global for sparse columns */ 7613 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7614 } 7615 ncoo_d += nz; 7616 } 7617 } 7618 } 7619 } 7620 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7621 PetscCall(ISDestroy(&glob)); 7622 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7623 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7624 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7625 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7626 7627 /* preallocate with COO data */ 7628 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7629 PetscCall(PetscFree2(coo_i, coo_j)); 7630 PetscFunctionReturn(PETSC_SUCCESS); 7631 } 7632 7633 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7634 { 7635 Mat_Product *product = mat->product; 7636 #if defined(PETSC_HAVE_DEVICE) 7637 PetscBool match = PETSC_FALSE; 7638 PetscBool usecpu = PETSC_FALSE; 7639 #else 7640 PetscBool match = PETSC_TRUE; 7641 #endif 7642 7643 PetscFunctionBegin; 7644 MatCheckProduct(mat, 1); 7645 #if defined(PETSC_HAVE_DEVICE) 7646 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7647 if (match) { /* we can always fallback to the CPU if requested */ 7648 switch (product->type) { 7649 case MATPRODUCT_AB: 7650 if (product->api_user) { 7651 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7652 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7653 PetscOptionsEnd(); 7654 } else { 7655 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7656 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7657 PetscOptionsEnd(); 7658 } 7659 break; 7660 case MATPRODUCT_AtB: 7661 if (product->api_user) { 7662 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7663 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7664 PetscOptionsEnd(); 7665 } else { 7666 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7667 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7668 PetscOptionsEnd(); 7669 } 7670 break; 7671 case MATPRODUCT_PtAP: 7672 if (product->api_user) { 7673 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7674 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7675 PetscOptionsEnd(); 7676 } else { 7677 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7678 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7679 PetscOptionsEnd(); 7680 } 7681 break; 7682 default: 7683 break; 7684 } 7685 match = (PetscBool)!usecpu; 7686 } 7687 #endif 7688 if (match) { 7689 switch (product->type) { 7690 case MATPRODUCT_AB: 7691 case MATPRODUCT_AtB: 7692 case MATPRODUCT_PtAP: 7693 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7694 break; 7695 default: 7696 break; 7697 } 7698 } 7699 /* fallback to MPIAIJ ops */ 7700 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7701 PetscFunctionReturn(PETSC_SUCCESS); 7702 } 7703 7704 /* 7705 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7706 7707 n - the number of block indices in cc[] 7708 cc - the block indices (must be large enough to contain the indices) 7709 */ 7710 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7711 { 7712 PetscInt cnt = -1, nidx, j; 7713 const PetscInt *idx; 7714 7715 PetscFunctionBegin; 7716 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7717 if (nidx) { 7718 cnt = 0; 7719 cc[cnt] = idx[0] / bs; 7720 for (j = 1; j < nidx; j++) { 7721 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7722 } 7723 } 7724 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7725 *n = cnt + 1; 7726 PetscFunctionReturn(PETSC_SUCCESS); 7727 } 7728 7729 /* 7730 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7731 7732 ncollapsed - the number of block indices 7733 collapsed - the block indices (must be large enough to contain the indices) 7734 */ 7735 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7736 { 7737 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7738 7739 PetscFunctionBegin; 7740 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7741 for (i = start + 1; i < start + bs; i++) { 7742 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7743 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7744 cprevtmp = cprev; 7745 cprev = merged; 7746 merged = cprevtmp; 7747 } 7748 *ncollapsed = nprev; 7749 if (collapsed) *collapsed = cprev; 7750 PetscFunctionReturn(PETSC_SUCCESS); 7751 } 7752 7753 /* 7754 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7755 7756 Input Parameter: 7757 . Amat - matrix 7758 - symmetrize - make the result symmetric 7759 + scale - scale with diagonal 7760 7761 Output Parameter: 7762 . a_Gmat - output scalar graph >= 0 7763 7764 */ 7765 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7766 { 7767 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7768 MPI_Comm comm; 7769 Mat Gmat; 7770 PetscBool ismpiaij, isseqaij; 7771 Mat a, b, c; 7772 MatType jtype; 7773 7774 PetscFunctionBegin; 7775 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7776 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7777 PetscCall(MatGetSize(Amat, &MM, &NN)); 7778 PetscCall(MatGetBlockSize(Amat, &bs)); 7779 nloc = (Iend - Istart) / bs; 7780 7781 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7782 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7783 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7784 7785 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7786 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7787 implementation */ 7788 if (bs > 1) { 7789 PetscCall(MatGetType(Amat, &jtype)); 7790 PetscCall(MatCreate(comm, &Gmat)); 7791 PetscCall(MatSetType(Gmat, jtype)); 7792 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7793 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7794 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7795 PetscInt *d_nnz, *o_nnz; 7796 MatScalar *aa, val, *AA; 7797 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7798 if (isseqaij) { 7799 a = Amat; 7800 b = NULL; 7801 } else { 7802 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7803 a = d->A; 7804 b = d->B; 7805 } 7806 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7807 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7808 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7809 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7810 const PetscInt *cols1, *cols2; 7811 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7812 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7813 nnz[brow / bs] = nc2 / bs; 7814 if (nc2 % bs) ok = 0; 7815 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7816 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7817 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7818 if (nc1 != nc2) ok = 0; 7819 else { 7820 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7821 if (cols1[jj] != cols2[jj]) ok = 0; 7822 if (cols1[jj] % bs != jj % bs) ok = 0; 7823 } 7824 } 7825 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7826 } 7827 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7828 if (!ok) { 7829 PetscCall(PetscFree2(d_nnz, o_nnz)); 7830 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7831 goto old_bs; 7832 } 7833 } 7834 } 7835 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7836 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7837 PetscCall(PetscFree2(d_nnz, o_nnz)); 7838 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7839 // diag 7840 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7841 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7842 ai = aseq->i; 7843 n = ai[brow + 1] - ai[brow]; 7844 aj = aseq->j + ai[brow]; 7845 for (int k = 0; k < n; k += bs) { // block columns 7846 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7847 val = 0; 7848 if (index_size == 0) { 7849 for (int ii = 0; ii < bs; ii++) { // rows in block 7850 aa = aseq->a + ai[brow + ii] + k; 7851 for (int jj = 0; jj < bs; jj++) { // columns in block 7852 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7853 } 7854 } 7855 } else { // use (index,index) value if provided 7856 for (int iii = 0; iii < index_size; iii++) { // rows in block 7857 int ii = index[iii]; 7858 aa = aseq->a + ai[brow + ii] + k; 7859 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7860 int jj = index[jjj]; 7861 val += PetscAbs(PetscRealPart(aa[jj])); 7862 } 7863 } 7864 } 7865 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7866 AA[k / bs] = val; 7867 } 7868 grow = Istart / bs + brow / bs; 7869 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7870 } 7871 // off-diag 7872 if (ismpiaij) { 7873 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7874 const PetscScalar *vals; 7875 const PetscInt *cols, *garray = aij->garray; 7876 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7877 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7878 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7879 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7880 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7881 AA[k / bs] = 0; 7882 AJ[cidx] = garray[cols[k]] / bs; 7883 } 7884 nc = ncols / bs; 7885 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7886 if (index_size == 0) { 7887 for (int ii = 0; ii < bs; ii++) { // rows in block 7888 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7889 for (int k = 0; k < ncols; k += bs) { 7890 for (int jj = 0; jj < bs; jj++) { // cols in block 7891 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7892 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7893 } 7894 } 7895 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7896 } 7897 } else { // use (index,index) value if provided 7898 for (int iii = 0; iii < index_size; iii++) { // rows in block 7899 int ii = index[iii]; 7900 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7901 for (int k = 0; k < ncols; k += bs) { 7902 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7903 int jj = index[jjj]; 7904 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7905 } 7906 } 7907 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7908 } 7909 } 7910 grow = Istart / bs + brow / bs; 7911 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7912 } 7913 } 7914 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7915 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7916 PetscCall(PetscFree2(AA, AJ)); 7917 } else { 7918 const PetscScalar *vals; 7919 const PetscInt *idx; 7920 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7921 old_bs: 7922 /* 7923 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7924 */ 7925 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7926 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7927 if (isseqaij) { 7928 PetscInt max_d_nnz; 7929 /* 7930 Determine exact preallocation count for (sequential) scalar matrix 7931 */ 7932 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7933 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7934 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7935 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7936 PetscCall(PetscFree3(w0, w1, w2)); 7937 } else if (ismpiaij) { 7938 Mat Daij, Oaij; 7939 const PetscInt *garray; 7940 PetscInt max_d_nnz; 7941 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7942 /* 7943 Determine exact preallocation count for diagonal block portion of scalar matrix 7944 */ 7945 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7946 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7947 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7948 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7949 PetscCall(PetscFree3(w0, w1, w2)); 7950 /* 7951 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7952 */ 7953 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7954 o_nnz[jj] = 0; 7955 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7956 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7957 o_nnz[jj] += ncols; 7958 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7959 } 7960 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7961 } 7962 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7963 /* get scalar copy (norms) of matrix */ 7964 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7965 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7966 PetscCall(PetscFree2(d_nnz, o_nnz)); 7967 for (Ii = Istart; Ii < Iend; Ii++) { 7968 PetscInt dest_row = Ii / bs; 7969 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7970 for (jj = 0; jj < ncols; jj++) { 7971 PetscInt dest_col = idx[jj] / bs; 7972 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7973 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7974 } 7975 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7976 } 7977 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7978 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7979 } 7980 } else { 7981 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7982 else { 7983 Gmat = Amat; 7984 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7985 } 7986 if (isseqaij) { 7987 a = Gmat; 7988 b = NULL; 7989 } else { 7990 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7991 a = d->A; 7992 b = d->B; 7993 } 7994 if (filter >= 0 || scale) { 7995 /* take absolute value of each entry */ 7996 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7997 MatInfo info; 7998 PetscScalar *avals; 7999 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8000 PetscCall(MatSeqAIJGetArray(c, &avals)); 8001 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8002 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8003 } 8004 } 8005 } 8006 if (symmetrize) { 8007 PetscBool isset, issym; 8008 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8009 if (!isset || !issym) { 8010 Mat matTrans; 8011 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8012 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8013 PetscCall(MatDestroy(&matTrans)); 8014 } 8015 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8016 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8017 if (scale) { 8018 /* scale c for all diagonal values = 1 or -1 */ 8019 Vec diag; 8020 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8021 PetscCall(MatGetDiagonal(Gmat, diag)); 8022 PetscCall(VecReciprocal(diag)); 8023 PetscCall(VecSqrtAbs(diag)); 8024 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8025 PetscCall(VecDestroy(&diag)); 8026 } 8027 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8028 8029 if (filter >= 0) { 8030 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8031 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8032 } 8033 *a_Gmat = Gmat; 8034 PetscFunctionReturn(PETSC_SUCCESS); 8035 } 8036 8037 /* 8038 Special version for direct calls from Fortran 8039 */ 8040 8041 /* Change these macros so can be used in void function */ 8042 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8043 #undef PetscCall 8044 #define PetscCall(...) \ 8045 do { \ 8046 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8047 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8048 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8049 return; \ 8050 } \ 8051 } while (0) 8052 8053 #undef SETERRQ 8054 #define SETERRQ(comm, ierr, ...) \ 8055 do { \ 8056 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8057 return; \ 8058 } while (0) 8059 8060 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8061 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8062 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8063 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8064 #else 8065 #endif 8066 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8067 { 8068 Mat mat = *mmat; 8069 PetscInt m = *mm, n = *mn; 8070 InsertMode addv = *maddv; 8071 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8072 PetscScalar value; 8073 8074 MatCheckPreallocated(mat, 1); 8075 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8076 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8077 { 8078 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8079 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8080 PetscBool roworiented = aij->roworiented; 8081 8082 /* Some Variables required in the macro */ 8083 Mat A = aij->A; 8084 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8085 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8086 MatScalar *aa; 8087 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8088 Mat B = aij->B; 8089 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8090 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8091 MatScalar *ba; 8092 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8093 * cannot use "#if defined" inside a macro. */ 8094 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8095 8096 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8097 PetscInt nonew = a->nonew; 8098 MatScalar *ap1, *ap2; 8099 8100 PetscFunctionBegin; 8101 PetscCall(MatSeqAIJGetArray(A, &aa)); 8102 PetscCall(MatSeqAIJGetArray(B, &ba)); 8103 for (i = 0; i < m; i++) { 8104 if (im[i] < 0) continue; 8105 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8106 if (im[i] >= rstart && im[i] < rend) { 8107 row = im[i] - rstart; 8108 lastcol1 = -1; 8109 rp1 = aj + ai[row]; 8110 ap1 = aa + ai[row]; 8111 rmax1 = aimax[row]; 8112 nrow1 = ailen[row]; 8113 low1 = 0; 8114 high1 = nrow1; 8115 lastcol2 = -1; 8116 rp2 = bj + bi[row]; 8117 ap2 = ba + bi[row]; 8118 rmax2 = bimax[row]; 8119 nrow2 = bilen[row]; 8120 low2 = 0; 8121 high2 = nrow2; 8122 8123 for (j = 0; j < n; j++) { 8124 if (roworiented) value = v[i * n + j]; 8125 else value = v[i + j * m]; 8126 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8127 if (in[j] >= cstart && in[j] < cend) { 8128 col = in[j] - cstart; 8129 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8130 } else if (in[j] < 0) continue; 8131 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8132 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8133 } else { 8134 if (mat->was_assembled) { 8135 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8136 #if defined(PETSC_USE_CTABLE) 8137 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8138 col--; 8139 #else 8140 col = aij->colmap[in[j]] - 1; 8141 #endif 8142 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8143 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8144 col = in[j]; 8145 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8146 B = aij->B; 8147 b = (Mat_SeqAIJ *)B->data; 8148 bimax = b->imax; 8149 bi = b->i; 8150 bilen = b->ilen; 8151 bj = b->j; 8152 rp2 = bj + bi[row]; 8153 ap2 = ba + bi[row]; 8154 rmax2 = bimax[row]; 8155 nrow2 = bilen[row]; 8156 low2 = 0; 8157 high2 = nrow2; 8158 bm = aij->B->rmap->n; 8159 ba = b->a; 8160 inserted = PETSC_FALSE; 8161 } 8162 } else col = in[j]; 8163 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8164 } 8165 } 8166 } else if (!aij->donotstash) { 8167 if (roworiented) { 8168 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8169 } else { 8170 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8171 } 8172 } 8173 } 8174 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8175 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8176 } 8177 PetscFunctionReturnVoid(); 8178 } 8179 8180 /* Undefining these here since they were redefined from their original definition above! No 8181 * other PETSc functions should be defined past this point, as it is impossible to recover the 8182 * original definitions */ 8183 #undef PetscCall 8184 #undef SETERRQ 8185