1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 A->nonzerostate++; \ 422 a_noinsert:; \ 423 ailen[row] = nrow1; \ 424 } while (0) 425 426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 427 do { \ 428 if (col <= lastcol2) low2 = 0; \ 429 else high2 = nrow2; \ 430 lastcol2 = col; \ 431 while (high2 - low2 > 5) { \ 432 t = (low2 + high2) / 2; \ 433 if (rp2[t] > col) high2 = t; \ 434 else low2 = t; \ 435 } \ 436 for (_i = low2; _i < high2; _i++) { \ 437 if (rp2[_i] > col) break; \ 438 if (rp2[_i] == col) { \ 439 if (addv == ADD_VALUES) { \ 440 ap2[_i] += value; \ 441 (void)PetscLogFlops(1.0); \ 442 } else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) { \ 447 low2 = 0; \ 448 high2 = nrow2; \ 449 goto b_noinsert; \ 450 } \ 451 if (nonew == 1) { \ 452 low2 = 0; \ 453 high2 = nrow2; \ 454 goto b_noinsert; \ 455 } \ 456 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 457 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 458 N = nrow2++ - 1; \ 459 b->nz++; \ 460 high2++; \ 461 /* shift up all the later entries in this row */ \ 462 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 463 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 464 rp2[_i] = col; \ 465 ap2[_i] = value; \ 466 B->nonzerostate++; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right-hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscMPIInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1126 { 1127 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1128 1129 PetscFunctionBegin; 1130 /* do nondiagonal part */ 1131 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1132 /* do local part */ 1133 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1134 /* add partial results together */ 1135 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1136 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1137 PetscFunctionReturn(PETSC_SUCCESS); 1138 } 1139 1140 /* 1141 This only works correctly for square matrices where the subblock A->A is the 1142 diagonal block 1143 */ 1144 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1150 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1151 PetscCall(MatGetDiagonal(a->A, v)); 1152 PetscFunctionReturn(PETSC_SUCCESS); 1153 } 1154 1155 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1156 { 1157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1158 1159 PetscFunctionBegin; 1160 PetscCall(MatScale(a->A, aa)); 1161 PetscCall(MatScale(a->B, aa)); 1162 PetscFunctionReturn(PETSC_SUCCESS); 1163 } 1164 1165 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1166 { 1167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1168 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1169 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1170 const PetscInt *garray = aij->garray; 1171 const PetscScalar *aa, *ba; 1172 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1173 PetscInt64 nz, hnz; 1174 PetscInt *rowlens; 1175 PetscInt *colidxs; 1176 PetscScalar *matvals; 1177 PetscMPIInt rank; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1194 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1195 if (rank == 0) { 1196 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1197 else header[3] = (PetscInt)hnz; 1198 } 1199 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1200 1201 /* fill in and store row lengths */ 1202 PetscCall(PetscMalloc1(m, &rowlens)); 1203 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1205 PetscCall(PetscFree(rowlens)); 1206 1207 /* fill in and store column indices */ 1208 PetscCall(PetscMalloc1(nz, &colidxs)); 1209 for (cnt = 0, i = 0; i < m; i++) { 1210 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1211 if (garray[B->j[jb]] > cs) break; 1212 colidxs[cnt++] = garray[B->j[jb]]; 1213 } 1214 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1215 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1216 } 1217 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1218 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1219 PetscCall(PetscFree(colidxs)); 1220 1221 /* fill in and store nonzero values */ 1222 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1223 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1224 PetscCall(PetscMalloc1(nz, &matvals)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 matvals[cnt++] = ba[jb]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1231 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1232 } 1233 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1234 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1235 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1236 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1237 PetscCall(PetscFree(matvals)); 1238 1239 /* write block size option to the viewer's .info file */ 1240 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 #include <petscdraw.h> 1245 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1246 { 1247 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1248 PetscMPIInt rank = aij->rank, size = aij->size; 1249 PetscBool isdraw, iascii, isbinary; 1250 PetscViewer sviewer; 1251 PetscViewerFormat format; 1252 1253 PetscFunctionBegin; 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1255 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1256 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1257 if (iascii) { 1258 PetscCall(PetscViewerGetFormat(viewer, &format)); 1259 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1260 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1261 PetscCall(PetscMalloc1(size, &nz)); 1262 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1263 for (i = 0; i < (PetscInt)size; i++) { 1264 nmax = PetscMax(nmax, nz[i]); 1265 nmin = PetscMin(nmin, nz[i]); 1266 navg += nz[i]; 1267 } 1268 PetscCall(PetscFree(nz)); 1269 navg = navg / size; 1270 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 PetscCall(PetscViewerGetFormat(viewer, &format)); 1274 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1275 MatInfo info; 1276 PetscInt *inodes = NULL; 1277 1278 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1279 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1280 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1281 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1282 if (!inodes) { 1283 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1284 (double)info.memory)); 1285 } else { 1286 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1287 (double)info.memory)); 1288 } 1289 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1292 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1293 PetscCall(PetscViewerFlush(viewer)); 1294 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1295 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1296 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1297 PetscFunctionReturn(PETSC_SUCCESS); 1298 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1299 PetscInt inodecount, inodelimit, *inodes; 1300 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1301 if (inodes) { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1303 } else { 1304 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1305 } 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 } else if (isbinary) { 1311 if (size == 1) { 1312 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1313 PetscCall(MatView(aij->A, viewer)); 1314 } else { 1315 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1316 } 1317 PetscFunctionReturn(PETSC_SUCCESS); 1318 } else if (iascii && size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 PetscFunctionReturn(PETSC_SUCCESS); 1322 } else if (isdraw) { 1323 PetscDraw draw; 1324 PetscBool isnull; 1325 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1326 PetscCall(PetscDrawIsNull(draw, &isnull)); 1327 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 1330 { /* assemble the entire matrix onto first processor */ 1331 Mat A = NULL, Av; 1332 IS isrow, iscol; 1333 1334 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1335 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1336 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1337 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1338 /* The commented code uses MatCreateSubMatrices instead */ 1339 /* 1340 Mat *AA, A = NULL, Av; 1341 IS isrow,iscol; 1342 1343 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1344 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1345 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1346 if (rank == 0) { 1347 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1348 A = AA[0]; 1349 Av = AA[0]; 1350 } 1351 PetscCall(MatDestroySubMatrices(1,&AA)); 1352 */ 1353 PetscCall(ISDestroy(&iscol)); 1354 PetscCall(ISDestroy(&isrow)); 1355 /* 1356 Everyone has to call to draw the matrix since the graphics waits are 1357 synchronized across all processors that share the PetscDraw object 1358 */ 1359 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 if (rank == 0) { 1361 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1362 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1363 } 1364 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1365 PetscCall(MatDestroy(&A)); 1366 } 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1371 { 1372 PetscBool iascii, isdraw, issocket, isbinary; 1373 1374 PetscFunctionBegin; 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1377 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1378 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1379 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1380 PetscFunctionReturn(PETSC_SUCCESS); 1381 } 1382 1383 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1384 { 1385 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1386 Vec bb1 = NULL; 1387 PetscBool hasop; 1388 1389 PetscFunctionBegin; 1390 if (flag == SOR_APPLY_UPPER) { 1391 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1392 PetscFunctionReturn(PETSC_SUCCESS); 1393 } 1394 1395 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1396 1397 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1398 if (flag & SOR_ZERO_INITIAL_GUESS) { 1399 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1400 its--; 1401 } 1402 1403 while (its--) { 1404 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1405 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1406 1407 /* update rhs: bb1 = bb - B*x */ 1408 PetscCall(VecScale(mat->lvec, -1.0)); 1409 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1410 1411 /* local sweep */ 1412 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1413 } 1414 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_EISENSTAT) { 1447 Vec xx1; 1448 1449 PetscCall(VecDuplicate(bb, &xx1)); 1450 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1451 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 if (!mat->diag) { 1455 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1456 PetscCall(MatGetDiagonal(matin, mat->diag)); 1457 } 1458 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1459 if (hasop) { 1460 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1461 } else { 1462 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1463 } 1464 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1465 1466 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1467 1468 /* local sweep */ 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1470 PetscCall(VecAXPY(xx, 1.0, xx1)); 1471 PetscCall(VecDestroy(&xx1)); 1472 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1473 1474 PetscCall(VecDestroy(&bb1)); 1475 1476 matin->factorerrortype = mat->A->factorerrortype; 1477 PetscFunctionReturn(PETSC_SUCCESS); 1478 } 1479 1480 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1481 { 1482 Mat aA, aB, Aperm; 1483 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1484 PetscScalar *aa, *ba; 1485 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1486 PetscSF rowsf, sf; 1487 IS parcolp = NULL; 1488 PetscBool done; 1489 1490 PetscFunctionBegin; 1491 PetscCall(MatGetLocalSize(A, &m, &n)); 1492 PetscCall(ISGetIndices(rowp, &rwant)); 1493 PetscCall(ISGetIndices(colp, &cwant)); 1494 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1495 1496 /* Invert row permutation to find out where my rows should go */ 1497 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1498 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1499 PetscCall(PetscSFSetFromOptions(rowsf)); 1500 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1501 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1502 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1503 1504 /* Invert column permutation to find out where my columns should go */ 1505 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1506 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1507 PetscCall(PetscSFSetFromOptions(sf)); 1508 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1509 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1510 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1511 PetscCall(PetscSFDestroy(&sf)); 1512 1513 PetscCall(ISRestoreIndices(rowp, &rwant)); 1514 PetscCall(ISRestoreIndices(colp, &cwant)); 1515 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1516 1517 /* Find out where my gcols should go */ 1518 PetscCall(MatGetSize(aB, NULL, &ng)); 1519 PetscCall(PetscMalloc1(ng, &gcdest)); 1520 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1521 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1522 PetscCall(PetscSFSetFromOptions(sf)); 1523 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1524 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1525 PetscCall(PetscSFDestroy(&sf)); 1526 1527 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1528 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1529 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1530 for (i = 0; i < m; i++) { 1531 PetscInt row = rdest[i]; 1532 PetscMPIInt rowner; 1533 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 PetscInt col = cdest[aj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 for (j = bi[i]; j < bi[i + 1]; j++) { 1542 PetscInt col = gcdest[bj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 } 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1551 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1552 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1553 PetscCall(PetscSFDestroy(&rowsf)); 1554 1555 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1556 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1557 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1558 for (i = 0; i < m; i++) { 1559 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1560 PetscInt j0, rowlen; 1561 rowlen = ai[i + 1] - ai[i]; 1562 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1563 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1564 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1565 } 1566 rowlen = bi[i + 1] - bi[i]; 1567 for (j0 = j = 0; j < rowlen; j0 = j) { 1568 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1569 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1570 } 1571 } 1572 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1575 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1576 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1577 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1578 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1579 PetscCall(PetscFree3(work, rdest, cdest)); 1580 PetscCall(PetscFree(gcdest)); 1581 if (parcolp) PetscCall(ISDestroy(&colp)); 1582 *B = Aperm; 1583 PetscFunctionReturn(PETSC_SUCCESS); 1584 } 1585 1586 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1587 { 1588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1589 1590 PetscFunctionBegin; 1591 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1592 if (ghosts) *ghosts = aij->garray; 1593 PetscFunctionReturn(PETSC_SUCCESS); 1594 } 1595 1596 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1597 { 1598 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1599 Mat A = mat->A, B = mat->B; 1600 PetscLogDouble isend[5], irecv[5]; 1601 1602 PetscFunctionBegin; 1603 info->block_size = 1.0; 1604 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1605 1606 isend[0] = info->nz_used; 1607 isend[1] = info->nz_allocated; 1608 isend[2] = info->nz_unneeded; 1609 isend[3] = info->memory; 1610 isend[4] = info->mallocs; 1611 1612 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1613 1614 isend[0] += info->nz_used; 1615 isend[1] += info->nz_allocated; 1616 isend[2] += info->nz_unneeded; 1617 isend[3] += info->memory; 1618 isend[4] += info->mallocs; 1619 if (flag == MAT_LOCAL) { 1620 info->nz_used = isend[0]; 1621 info->nz_allocated = isend[1]; 1622 info->nz_unneeded = isend[2]; 1623 info->memory = isend[3]; 1624 info->mallocs = isend[4]; 1625 } else if (flag == MAT_GLOBAL_MAX) { 1626 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1627 1628 info->nz_used = irecv[0]; 1629 info->nz_allocated = irecv[1]; 1630 info->nz_unneeded = irecv[2]; 1631 info->memory = irecv[3]; 1632 info->mallocs = irecv[4]; 1633 } else if (flag == MAT_GLOBAL_SUM) { 1634 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1635 1636 info->nz_used = irecv[0]; 1637 info->nz_allocated = irecv[1]; 1638 info->nz_unneeded = irecv[2]; 1639 info->memory = irecv[3]; 1640 info->mallocs = irecv[4]; 1641 } 1642 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1643 info->fill_ratio_needed = 0; 1644 info->factor_mallocs = 0; 1645 PetscFunctionReturn(PETSC_SUCCESS); 1646 } 1647 1648 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1649 { 1650 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1651 1652 PetscFunctionBegin; 1653 switch (op) { 1654 case MAT_NEW_NONZERO_LOCATIONS: 1655 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1656 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1657 case MAT_KEEP_NONZERO_PATTERN: 1658 case MAT_NEW_NONZERO_LOCATION_ERR: 1659 case MAT_USE_INODES: 1660 case MAT_IGNORE_ZERO_ENTRIES: 1661 case MAT_FORM_EXPLICIT_TRANSPOSE: 1662 MatCheckPreallocated(A, 1); 1663 PetscCall(MatSetOption(a->A, op, flg)); 1664 PetscCall(MatSetOption(a->B, op, flg)); 1665 break; 1666 case MAT_ROW_ORIENTED: 1667 MatCheckPreallocated(A, 1); 1668 a->roworiented = flg; 1669 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_FORCE_DIAGONAL_ENTRIES: 1674 case MAT_SORTED_FULL: 1675 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1676 break; 1677 case MAT_IGNORE_OFF_PROC_ENTRIES: 1678 a->donotstash = flg; 1679 break; 1680 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1681 case MAT_SPD: 1682 case MAT_SYMMETRIC: 1683 case MAT_STRUCTURALLY_SYMMETRIC: 1684 case MAT_HERMITIAN: 1685 case MAT_SYMMETRY_ETERNAL: 1686 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1687 case MAT_SPD_ETERNAL: 1688 /* if the diagonal matrix is square it inherits some of the properties above */ 1689 break; 1690 case MAT_SUBMAT_SINGLEIS: 1691 A->submat_singleis = flg; 1692 break; 1693 case MAT_STRUCTURE_ONLY: 1694 /* The option is handled directly by MatSetOption() */ 1695 break; 1696 default: 1697 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1698 } 1699 PetscFunctionReturn(PETSC_SUCCESS); 1700 } 1701 1702 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1703 { 1704 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1705 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1706 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1707 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1708 PetscInt *cmap, *idx_p; 1709 1710 PetscFunctionBegin; 1711 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1712 mat->getrowactive = PETSC_TRUE; 1713 1714 if (!mat->rowvalues && (idx || v)) { 1715 /* 1716 allocate enough space to hold information from the longest row. 1717 */ 1718 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1719 PetscInt max = 1, tmp; 1720 for (i = 0; i < matin->rmap->n; i++) { 1721 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1722 if (max < tmp) max = tmp; 1723 } 1724 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1725 } 1726 1727 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1728 lrow = row - rstart; 1729 1730 pvA = &vworkA; 1731 pcA = &cworkA; 1732 pvB = &vworkB; 1733 pcB = &cworkB; 1734 if (!v) { 1735 pvA = NULL; 1736 pvB = NULL; 1737 } 1738 if (!idx) { 1739 pcA = NULL; 1740 if (!v) pcB = NULL; 1741 } 1742 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1743 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1744 nztot = nzA + nzB; 1745 1746 cmap = mat->garray; 1747 if (v || idx) { 1748 if (nztot) { 1749 /* Sort by increasing column numbers, assuming A and B already sorted */ 1750 PetscInt imark = -1; 1751 if (v) { 1752 *v = v_p = mat->rowvalues; 1753 for (i = 0; i < nzB; i++) { 1754 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1755 else break; 1756 } 1757 imark = i; 1758 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1759 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1760 } 1761 if (idx) { 1762 *idx = idx_p = mat->rowindices; 1763 if (imark > -1) { 1764 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1765 } else { 1766 for (i = 0; i < nzB; i++) { 1767 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1768 else break; 1769 } 1770 imark = i; 1771 } 1772 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1773 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1774 } 1775 } else { 1776 if (idx) *idx = NULL; 1777 if (v) *v = NULL; 1778 } 1779 } 1780 *nz = nztot; 1781 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1782 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1783 PetscFunctionReturn(PETSC_SUCCESS); 1784 } 1785 1786 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1787 { 1788 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1789 1790 PetscFunctionBegin; 1791 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1792 aij->getrowactive = PETSC_FALSE; 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1800 PetscInt i, j, cstart = mat->cmap->rstart; 1801 PetscReal sum = 0.0; 1802 const MatScalar *v, *amata, *bmata; 1803 1804 PetscFunctionBegin; 1805 if (aij->size == 1) { 1806 PetscCall(MatNorm(aij->A, type, norm)); 1807 } else { 1808 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1809 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1810 if (type == NORM_FROBENIUS) { 1811 v = amata; 1812 for (i = 0; i < amat->nz; i++) { 1813 sum += PetscRealPart(PetscConj(*v) * (*v)); 1814 v++; 1815 } 1816 v = bmata; 1817 for (i = 0; i < bmat->nz; i++) { 1818 sum += PetscRealPart(PetscConj(*v) * (*v)); 1819 v++; 1820 } 1821 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1822 *norm = PetscSqrtReal(*norm); 1823 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1824 } else if (type == NORM_1) { /* max column norm */ 1825 PetscReal *tmp, *tmp2; 1826 PetscInt *jj, *garray = aij->garray; 1827 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1828 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1829 *norm = 0.0; 1830 v = amata; 1831 jj = amat->j; 1832 for (j = 0; j < amat->nz; j++) { 1833 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1834 v++; 1835 } 1836 v = bmata; 1837 jj = bmat->j; 1838 for (j = 0; j < bmat->nz; j++) { 1839 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1840 v++; 1841 } 1842 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1843 for (j = 0; j < mat->cmap->N; j++) { 1844 if (tmp2[j] > *norm) *norm = tmp2[j]; 1845 } 1846 PetscCall(PetscFree(tmp)); 1847 PetscCall(PetscFree(tmp2)); 1848 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1849 } else if (type == NORM_INFINITY) { /* max row norm */ 1850 PetscReal ntemp = 0.0; 1851 for (j = 0; j < aij->A->rmap->n; j++) { 1852 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1853 sum = 0.0; 1854 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1855 sum += PetscAbsScalar(*v); 1856 v++; 1857 } 1858 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1859 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1860 sum += PetscAbsScalar(*v); 1861 v++; 1862 } 1863 if (sum > ntemp) ntemp = sum; 1864 } 1865 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1866 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1867 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1868 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1869 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1875 { 1876 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1877 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1878 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1879 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1880 Mat B, A_diag, *B_diag; 1881 const MatScalar *pbv, *bv; 1882 1883 PetscFunctionBegin; 1884 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1885 ma = A->rmap->n; 1886 na = A->cmap->n; 1887 mb = a->B->rmap->n; 1888 nb = a->B->cmap->n; 1889 ai = Aloc->i; 1890 aj = Aloc->j; 1891 bi = Bloc->i; 1892 bj = Bloc->j; 1893 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1894 PetscInt *d_nnz, *g_nnz, *o_nnz; 1895 PetscSFNode *oloc; 1896 PETSC_UNUSED PetscSF sf; 1897 1898 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1899 /* compute d_nnz for preallocation */ 1900 PetscCall(PetscArrayzero(d_nnz, na)); 1901 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1902 /* compute local off-diagonal contributions */ 1903 PetscCall(PetscArrayzero(g_nnz, nb)); 1904 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1905 /* map those to global */ 1906 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1907 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1908 PetscCall(PetscSFSetFromOptions(sf)); 1909 PetscCall(PetscArrayzero(o_nnz, na)); 1910 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1911 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1912 PetscCall(PetscSFDestroy(&sf)); 1913 1914 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1915 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1916 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1917 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1918 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1919 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1920 } else { 1921 B = *matout; 1922 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1923 } 1924 1925 b = (Mat_MPIAIJ *)B->data; 1926 A_diag = a->A; 1927 B_diag = &b->A; 1928 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1929 A_diag_ncol = A_diag->cmap->N; 1930 B_diag_ilen = sub_B_diag->ilen; 1931 B_diag_i = sub_B_diag->i; 1932 1933 /* Set ilen for diagonal of B */ 1934 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1935 1936 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1937 very quickly (=without using MatSetValues), because all writes are local. */ 1938 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1939 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1940 1941 /* copy over the B part */ 1942 PetscCall(PetscMalloc1(bi[mb], &cols)); 1943 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1944 pbv = bv; 1945 row = A->rmap->rstart; 1946 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1947 cols_tmp = cols; 1948 for (i = 0; i < mb; i++) { 1949 ncol = bi[i + 1] - bi[i]; 1950 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1951 row++; 1952 if (pbv) pbv += ncol; 1953 if (cols_tmp) cols_tmp += ncol; 1954 } 1955 PetscCall(PetscFree(cols)); 1956 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1957 1958 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1959 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1960 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1961 *matout = B; 1962 } else { 1963 PetscCall(MatHeaderMerge(A, &B)); 1964 } 1965 PetscFunctionReturn(PETSC_SUCCESS); 1966 } 1967 1968 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1969 { 1970 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1971 Mat a = aij->A, b = aij->B; 1972 PetscInt s1, s2, s3; 1973 1974 PetscFunctionBegin; 1975 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1976 if (rr) { 1977 PetscCall(VecGetLocalSize(rr, &s1)); 1978 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1979 /* Overlap communication with computation. */ 1980 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1981 } 1982 if (ll) { 1983 PetscCall(VecGetLocalSize(ll, &s1)); 1984 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1985 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1986 } 1987 /* scale the diagonal block */ 1988 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1989 1990 if (rr) { 1991 /* Do a scatter end and then right scale the off-diagonal block */ 1992 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1993 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1994 } 1995 PetscFunctionReturn(PETSC_SUCCESS); 1996 } 1997 1998 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1999 { 2000 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2001 2002 PetscFunctionBegin; 2003 PetscCall(MatSetUnfactored(a->A)); 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2008 { 2009 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2010 Mat a, b, c, d; 2011 PetscBool flg; 2012 2013 PetscFunctionBegin; 2014 a = matA->A; 2015 b = matA->B; 2016 c = matB->A; 2017 d = matB->B; 2018 2019 PetscCall(MatEqual(a, c, &flg)); 2020 if (flg) PetscCall(MatEqual(b, d, &flg)); 2021 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2022 PetscFunctionReturn(PETSC_SUCCESS); 2023 } 2024 2025 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2026 { 2027 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2028 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2029 2030 PetscFunctionBegin; 2031 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2032 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2033 /* because of the column compression in the off-processor part of the matrix a->B, 2034 the number of columns in a->B and b->B may be different, hence we cannot call 2035 the MatCopy() directly on the two parts. If need be, we can provide a more 2036 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2037 then copying the submatrices */ 2038 PetscCall(MatCopy_Basic(A, B, str)); 2039 } else { 2040 PetscCall(MatCopy(a->A, b->A, str)); 2041 PetscCall(MatCopy(a->B, b->B, str)); 2042 } 2043 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 /* 2048 Computes the number of nonzeros per row needed for preallocation when X and Y 2049 have different nonzero structure. 2050 */ 2051 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2052 { 2053 PetscInt i, j, k, nzx, nzy; 2054 2055 PetscFunctionBegin; 2056 /* Set the number of nonzeros in the new matrix */ 2057 for (i = 0; i < m; i++) { 2058 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2059 nzx = xi[i + 1] - xi[i]; 2060 nzy = yi[i + 1] - yi[i]; 2061 nnz[i] = 0; 2062 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2063 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2064 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2065 nnz[i]++; 2066 } 2067 for (; k < nzy; k++) nnz[i]++; 2068 } 2069 PetscFunctionReturn(PETSC_SUCCESS); 2070 } 2071 2072 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2073 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2074 { 2075 PetscInt m = Y->rmap->N; 2076 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2077 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2078 2079 PetscFunctionBegin; 2080 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2081 PetscFunctionReturn(PETSC_SUCCESS); 2082 } 2083 2084 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2085 { 2086 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 if (str == SAME_NONZERO_PATTERN) { 2090 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2091 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2092 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2093 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2094 } else { 2095 Mat B; 2096 PetscInt *nnz_d, *nnz_o; 2097 2098 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2099 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2100 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2101 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2102 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2103 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2104 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2105 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2106 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2107 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2108 PetscCall(MatHeaderMerge(Y, &B)); 2109 PetscCall(PetscFree(nnz_d)); 2110 PetscCall(PetscFree(nnz_o)); 2111 } 2112 PetscFunctionReturn(PETSC_SUCCESS); 2113 } 2114 2115 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2116 2117 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2118 { 2119 PetscFunctionBegin; 2120 if (PetscDefined(USE_COMPLEX)) { 2121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2122 2123 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2124 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2125 } 2126 PetscFunctionReturn(PETSC_SUCCESS); 2127 } 2128 2129 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2130 { 2131 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2132 2133 PetscFunctionBegin; 2134 PetscCall(MatRealPart(a->A)); 2135 PetscCall(MatRealPart(a->B)); 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2142 2143 PetscFunctionBegin; 2144 PetscCall(MatImaginaryPart(a->A)); 2145 PetscCall(MatImaginaryPart(a->B)); 2146 PetscFunctionReturn(PETSC_SUCCESS); 2147 } 2148 2149 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2152 PetscInt i, *idxb = NULL, m = A->rmap->n; 2153 PetscScalar *va, *vv; 2154 Vec vB, vA; 2155 const PetscScalar *vb; 2156 2157 PetscFunctionBegin; 2158 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2159 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2160 2161 PetscCall(VecGetArrayWrite(vA, &va)); 2162 if (idx) { 2163 for (i = 0; i < m; i++) { 2164 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2165 } 2166 } 2167 2168 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2169 PetscCall(PetscMalloc1(m, &idxb)); 2170 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2171 2172 PetscCall(VecGetArrayWrite(v, &vv)); 2173 PetscCall(VecGetArrayRead(vB, &vb)); 2174 for (i = 0; i < m; i++) { 2175 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2176 vv[i] = vb[i]; 2177 if (idx) idx[i] = a->garray[idxb[i]]; 2178 } else { 2179 vv[i] = va[i]; 2180 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2181 } 2182 } 2183 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2184 PetscCall(VecRestoreArrayWrite(vA, &va)); 2185 PetscCall(VecRestoreArrayRead(vB, &vb)); 2186 PetscCall(PetscFree(idxb)); 2187 PetscCall(VecDestroy(&vA)); 2188 PetscCall(VecDestroy(&vB)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2193 { 2194 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2195 PetscInt m = A->rmap->n; 2196 Vec vB, vA; 2197 2198 PetscFunctionBegin; 2199 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2200 PetscCall(MatGetRowSumAbs(a->A, vA)); 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2202 PetscCall(MatGetRowSumAbs(a->B, vB)); 2203 PetscCall(VecAXPY(vA, 1.0, vB)); 2204 PetscCall(VecDestroy(&vB)); 2205 PetscCall(VecCopy(vA, v)); 2206 PetscCall(VecDestroy(&vA)); 2207 PetscFunctionReturn(PETSC_SUCCESS); 2208 } 2209 2210 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2211 { 2212 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2213 PetscInt m = A->rmap->n, n = A->cmap->n; 2214 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2215 PetscInt *cmap = mat->garray; 2216 PetscInt *diagIdx, *offdiagIdx; 2217 Vec diagV, offdiagV; 2218 PetscScalar *a, *diagA, *offdiagA; 2219 const PetscScalar *ba, *bav; 2220 PetscInt r, j, col, ncols, *bi, *bj; 2221 Mat B = mat->B; 2222 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2223 2224 PetscFunctionBegin; 2225 /* When a process holds entire A and other processes have no entry */ 2226 if (A->cmap->N == n) { 2227 PetscCall(VecGetArrayWrite(v, &diagA)); 2228 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2229 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2230 PetscCall(VecDestroy(&diagV)); 2231 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2232 PetscFunctionReturn(PETSC_SUCCESS); 2233 } else if (n == 0) { 2234 if (m) { 2235 PetscCall(VecGetArrayWrite(v, &a)); 2236 for (r = 0; r < m; r++) { 2237 a[r] = 0.0; 2238 if (idx) idx[r] = -1; 2239 } 2240 PetscCall(VecRestoreArrayWrite(v, &a)); 2241 } 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2246 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2247 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2248 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2249 2250 /* Get offdiagIdx[] for implicit 0.0 */ 2251 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2252 ba = bav; 2253 bi = b->i; 2254 bj = b->j; 2255 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2256 for (r = 0; r < m; r++) { 2257 ncols = bi[r + 1] - bi[r]; 2258 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2259 offdiagA[r] = *ba; 2260 offdiagIdx[r] = cmap[0]; 2261 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2262 offdiagA[r] = 0.0; 2263 2264 /* Find first hole in the cmap */ 2265 for (j = 0; j < ncols; j++) { 2266 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2267 if (col > j && j < cstart) { 2268 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2269 break; 2270 } else if (col > j + n && j >= cstart) { 2271 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2272 break; 2273 } 2274 } 2275 if (j == ncols && ncols < A->cmap->N - n) { 2276 /* a hole is outside compressed Bcols */ 2277 if (ncols == 0) { 2278 if (cstart) { 2279 offdiagIdx[r] = 0; 2280 } else offdiagIdx[r] = cend; 2281 } else { /* ncols > 0 */ 2282 offdiagIdx[r] = cmap[ncols - 1] + 1; 2283 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2284 } 2285 } 2286 } 2287 2288 for (j = 0; j < ncols; j++) { 2289 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2290 offdiagA[r] = *ba; 2291 offdiagIdx[r] = cmap[*bj]; 2292 } 2293 ba++; 2294 bj++; 2295 } 2296 } 2297 2298 PetscCall(VecGetArrayWrite(v, &a)); 2299 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2300 for (r = 0; r < m; ++r) { 2301 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2302 a[r] = diagA[r]; 2303 if (idx) idx[r] = cstart + diagIdx[r]; 2304 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2305 a[r] = diagA[r]; 2306 if (idx) { 2307 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2308 idx[r] = cstart + diagIdx[r]; 2309 } else idx[r] = offdiagIdx[r]; 2310 } 2311 } else { 2312 a[r] = offdiagA[r]; 2313 if (idx) idx[r] = offdiagIdx[r]; 2314 } 2315 } 2316 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2317 PetscCall(VecRestoreArrayWrite(v, &a)); 2318 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2319 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2320 PetscCall(VecDestroy(&diagV)); 2321 PetscCall(VecDestroy(&offdiagV)); 2322 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2323 PetscFunctionReturn(PETSC_SUCCESS); 2324 } 2325 2326 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2327 { 2328 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2329 PetscInt m = A->rmap->n, n = A->cmap->n; 2330 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2331 PetscInt *cmap = mat->garray; 2332 PetscInt *diagIdx, *offdiagIdx; 2333 Vec diagV, offdiagV; 2334 PetscScalar *a, *diagA, *offdiagA; 2335 const PetscScalar *ba, *bav; 2336 PetscInt r, j, col, ncols, *bi, *bj; 2337 Mat B = mat->B; 2338 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2339 2340 PetscFunctionBegin; 2341 /* When a process holds entire A and other processes have no entry */ 2342 if (A->cmap->N == n) { 2343 PetscCall(VecGetArrayWrite(v, &diagA)); 2344 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2345 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2346 PetscCall(VecDestroy(&diagV)); 2347 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2348 PetscFunctionReturn(PETSC_SUCCESS); 2349 } else if (n == 0) { 2350 if (m) { 2351 PetscCall(VecGetArrayWrite(v, &a)); 2352 for (r = 0; r < m; r++) { 2353 a[r] = PETSC_MAX_REAL; 2354 if (idx) idx[r] = -1; 2355 } 2356 PetscCall(VecRestoreArrayWrite(v, &a)); 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2362 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2363 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2364 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2365 2366 /* Get offdiagIdx[] for implicit 0.0 */ 2367 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2368 ba = bav; 2369 bi = b->i; 2370 bj = b->j; 2371 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2372 for (r = 0; r < m; r++) { 2373 ncols = bi[r + 1] - bi[r]; 2374 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2375 offdiagA[r] = *ba; 2376 offdiagIdx[r] = cmap[0]; 2377 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2378 offdiagA[r] = 0.0; 2379 2380 /* Find first hole in the cmap */ 2381 for (j = 0; j < ncols; j++) { 2382 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2383 if (col > j && j < cstart) { 2384 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2385 break; 2386 } else if (col > j + n && j >= cstart) { 2387 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2388 break; 2389 } 2390 } 2391 if (j == ncols && ncols < A->cmap->N - n) { 2392 /* a hole is outside compressed Bcols */ 2393 if (ncols == 0) { 2394 if (cstart) { 2395 offdiagIdx[r] = 0; 2396 } else offdiagIdx[r] = cend; 2397 } else { /* ncols > 0 */ 2398 offdiagIdx[r] = cmap[ncols - 1] + 1; 2399 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2400 } 2401 } 2402 } 2403 2404 for (j = 0; j < ncols; j++) { 2405 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2406 offdiagA[r] = *ba; 2407 offdiagIdx[r] = cmap[*bj]; 2408 } 2409 ba++; 2410 bj++; 2411 } 2412 } 2413 2414 PetscCall(VecGetArrayWrite(v, &a)); 2415 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2416 for (r = 0; r < m; ++r) { 2417 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2418 a[r] = diagA[r]; 2419 if (idx) idx[r] = cstart + diagIdx[r]; 2420 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2421 a[r] = diagA[r]; 2422 if (idx) { 2423 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2424 idx[r] = cstart + diagIdx[r]; 2425 } else idx[r] = offdiagIdx[r]; 2426 } 2427 } else { 2428 a[r] = offdiagA[r]; 2429 if (idx) idx[r] = offdiagIdx[r]; 2430 } 2431 } 2432 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2433 PetscCall(VecRestoreArrayWrite(v, &a)); 2434 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2435 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2436 PetscCall(VecDestroy(&diagV)); 2437 PetscCall(VecDestroy(&offdiagV)); 2438 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2439 PetscFunctionReturn(PETSC_SUCCESS); 2440 } 2441 2442 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2443 { 2444 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2445 PetscInt m = A->rmap->n, n = A->cmap->n; 2446 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2447 PetscInt *cmap = mat->garray; 2448 PetscInt *diagIdx, *offdiagIdx; 2449 Vec diagV, offdiagV; 2450 PetscScalar *a, *diagA, *offdiagA; 2451 const PetscScalar *ba, *bav; 2452 PetscInt r, j, col, ncols, *bi, *bj; 2453 Mat B = mat->B; 2454 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2455 2456 PetscFunctionBegin; 2457 /* When a process holds entire A and other processes have no entry */ 2458 if (A->cmap->N == n) { 2459 PetscCall(VecGetArrayWrite(v, &diagA)); 2460 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2461 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2462 PetscCall(VecDestroy(&diagV)); 2463 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2464 PetscFunctionReturn(PETSC_SUCCESS); 2465 } else if (n == 0) { 2466 if (m) { 2467 PetscCall(VecGetArrayWrite(v, &a)); 2468 for (r = 0; r < m; r++) { 2469 a[r] = PETSC_MIN_REAL; 2470 if (idx) idx[r] = -1; 2471 } 2472 PetscCall(VecRestoreArrayWrite(v, &a)); 2473 } 2474 PetscFunctionReturn(PETSC_SUCCESS); 2475 } 2476 2477 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2478 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2479 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2480 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2481 2482 /* Get offdiagIdx[] for implicit 0.0 */ 2483 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2484 ba = bav; 2485 bi = b->i; 2486 bj = b->j; 2487 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2488 for (r = 0; r < m; r++) { 2489 ncols = bi[r + 1] - bi[r]; 2490 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2491 offdiagA[r] = *ba; 2492 offdiagIdx[r] = cmap[0]; 2493 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2494 offdiagA[r] = 0.0; 2495 2496 /* Find first hole in the cmap */ 2497 for (j = 0; j < ncols; j++) { 2498 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2499 if (col > j && j < cstart) { 2500 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2501 break; 2502 } else if (col > j + n && j >= cstart) { 2503 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2504 break; 2505 } 2506 } 2507 if (j == ncols && ncols < A->cmap->N - n) { 2508 /* a hole is outside compressed Bcols */ 2509 if (ncols == 0) { 2510 if (cstart) { 2511 offdiagIdx[r] = 0; 2512 } else offdiagIdx[r] = cend; 2513 } else { /* ncols > 0 */ 2514 offdiagIdx[r] = cmap[ncols - 1] + 1; 2515 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2516 } 2517 } 2518 } 2519 2520 for (j = 0; j < ncols; j++) { 2521 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2522 offdiagA[r] = *ba; 2523 offdiagIdx[r] = cmap[*bj]; 2524 } 2525 ba++; 2526 bj++; 2527 } 2528 } 2529 2530 PetscCall(VecGetArrayWrite(v, &a)); 2531 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2532 for (r = 0; r < m; ++r) { 2533 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2534 a[r] = diagA[r]; 2535 if (idx) idx[r] = cstart + diagIdx[r]; 2536 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2537 a[r] = diagA[r]; 2538 if (idx) { 2539 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2540 idx[r] = cstart + diagIdx[r]; 2541 } else idx[r] = offdiagIdx[r]; 2542 } 2543 } else { 2544 a[r] = offdiagA[r]; 2545 if (idx) idx[r] = offdiagIdx[r]; 2546 } 2547 } 2548 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2549 PetscCall(VecRestoreArrayWrite(v, &a)); 2550 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2551 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2552 PetscCall(VecDestroy(&diagV)); 2553 PetscCall(VecDestroy(&offdiagV)); 2554 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2555 PetscFunctionReturn(PETSC_SUCCESS); 2556 } 2557 2558 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2559 { 2560 Mat *dummy; 2561 2562 PetscFunctionBegin; 2563 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2564 *newmat = *dummy; 2565 PetscCall(PetscFree(dummy)); 2566 PetscFunctionReturn(PETSC_SUCCESS); 2567 } 2568 2569 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2570 { 2571 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2572 2573 PetscFunctionBegin; 2574 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2575 A->factorerrortype = a->A->factorerrortype; 2576 PetscFunctionReturn(PETSC_SUCCESS); 2577 } 2578 2579 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2580 { 2581 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2582 2583 PetscFunctionBegin; 2584 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2585 PetscCall(MatSetRandom(aij->A, rctx)); 2586 if (x->assembled) { 2587 PetscCall(MatSetRandom(aij->B, rctx)); 2588 } else { 2589 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2590 } 2591 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2592 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2593 PetscFunctionReturn(PETSC_SUCCESS); 2594 } 2595 2596 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2597 { 2598 PetscFunctionBegin; 2599 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2600 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 /*@ 2605 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2606 2607 Not Collective 2608 2609 Input Parameter: 2610 . A - the matrix 2611 2612 Output Parameter: 2613 . nz - the number of nonzeros 2614 2615 Level: advanced 2616 2617 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2618 @*/ 2619 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2620 { 2621 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2622 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2623 PetscBool isaij; 2624 2625 PetscFunctionBegin; 2626 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2627 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2628 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2629 PetscFunctionReturn(PETSC_SUCCESS); 2630 } 2631 2632 /*@ 2633 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2634 2635 Collective 2636 2637 Input Parameters: 2638 + A - the matrix 2639 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2640 2641 Level: advanced 2642 2643 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2644 @*/ 2645 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2646 { 2647 PetscFunctionBegin; 2648 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2653 { 2654 PetscBool sc = PETSC_FALSE, flg; 2655 2656 PetscFunctionBegin; 2657 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2658 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2659 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2660 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2661 PetscOptionsHeadEnd(); 2662 PetscFunctionReturn(PETSC_SUCCESS); 2663 } 2664 2665 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2666 { 2667 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2668 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2669 2670 PetscFunctionBegin; 2671 if (!Y->preallocated) { 2672 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2673 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2674 PetscInt nonew = aij->nonew; 2675 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2676 aij->nonew = nonew; 2677 } 2678 PetscCall(MatShift_Basic(Y, a)); 2679 PetscFunctionReturn(PETSC_SUCCESS); 2680 } 2681 2682 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2683 { 2684 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2685 2686 PetscFunctionBegin; 2687 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2688 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2689 if (d) { 2690 PetscInt rstart; 2691 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2692 *d += rstart; 2693 } 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2700 2701 PetscFunctionBegin; 2702 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2703 PetscFunctionReturn(PETSC_SUCCESS); 2704 } 2705 2706 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2707 { 2708 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2709 2710 PetscFunctionBegin; 2711 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2712 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2713 PetscFunctionReturn(PETSC_SUCCESS); 2714 } 2715 2716 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2717 MatGetRow_MPIAIJ, 2718 MatRestoreRow_MPIAIJ, 2719 MatMult_MPIAIJ, 2720 /* 4*/ MatMultAdd_MPIAIJ, 2721 MatMultTranspose_MPIAIJ, 2722 MatMultTransposeAdd_MPIAIJ, 2723 NULL, 2724 NULL, 2725 NULL, 2726 /*10*/ NULL, 2727 NULL, 2728 NULL, 2729 MatSOR_MPIAIJ, 2730 MatTranspose_MPIAIJ, 2731 /*15*/ MatGetInfo_MPIAIJ, 2732 MatEqual_MPIAIJ, 2733 MatGetDiagonal_MPIAIJ, 2734 MatDiagonalScale_MPIAIJ, 2735 MatNorm_MPIAIJ, 2736 /*20*/ MatAssemblyBegin_MPIAIJ, 2737 MatAssemblyEnd_MPIAIJ, 2738 MatSetOption_MPIAIJ, 2739 MatZeroEntries_MPIAIJ, 2740 /*24*/ MatZeroRows_MPIAIJ, 2741 NULL, 2742 NULL, 2743 NULL, 2744 NULL, 2745 /*29*/ MatSetUp_MPI_Hash, 2746 NULL, 2747 NULL, 2748 MatGetDiagonalBlock_MPIAIJ, 2749 NULL, 2750 /*34*/ MatDuplicate_MPIAIJ, 2751 NULL, 2752 NULL, 2753 NULL, 2754 NULL, 2755 /*39*/ MatAXPY_MPIAIJ, 2756 MatCreateSubMatrices_MPIAIJ, 2757 MatIncreaseOverlap_MPIAIJ, 2758 MatGetValues_MPIAIJ, 2759 MatCopy_MPIAIJ, 2760 /*44*/ MatGetRowMax_MPIAIJ, 2761 MatScale_MPIAIJ, 2762 MatShift_MPIAIJ, 2763 MatDiagonalSet_MPIAIJ, 2764 MatZeroRowsColumns_MPIAIJ, 2765 /*49*/ MatSetRandom_MPIAIJ, 2766 MatGetRowIJ_MPIAIJ, 2767 MatRestoreRowIJ_MPIAIJ, 2768 NULL, 2769 NULL, 2770 /*54*/ MatFDColoringCreate_MPIXAIJ, 2771 NULL, 2772 MatSetUnfactored_MPIAIJ, 2773 MatPermute_MPIAIJ, 2774 NULL, 2775 /*59*/ MatCreateSubMatrix_MPIAIJ, 2776 MatDestroy_MPIAIJ, 2777 MatView_MPIAIJ, 2778 NULL, 2779 NULL, 2780 /*64*/ NULL, 2781 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 NULL, 2785 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2786 MatGetRowMinAbs_MPIAIJ, 2787 NULL, 2788 NULL, 2789 NULL, 2790 NULL, 2791 /*75*/ MatFDColoringApply_AIJ, 2792 MatSetFromOptions_MPIAIJ, 2793 NULL, 2794 NULL, 2795 MatFindZeroDiagonals_MPIAIJ, 2796 /*80*/ NULL, 2797 NULL, 2798 NULL, 2799 /*83*/ MatLoad_MPIAIJ, 2800 NULL, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*89*/ NULL, 2806 NULL, 2807 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2808 NULL, 2809 NULL, 2810 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 NULL, 2814 MatBindToCPU_MPIAIJ, 2815 /*99*/ MatProductSetFromOptions_MPIAIJ, 2816 NULL, 2817 NULL, 2818 MatConjugate_MPIAIJ, 2819 NULL, 2820 /*104*/ MatSetValuesRow_MPIAIJ, 2821 MatRealPart_MPIAIJ, 2822 MatImaginaryPart_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*109*/ NULL, 2826 NULL, 2827 MatGetRowMin_MPIAIJ, 2828 NULL, 2829 MatMissingDiagonal_MPIAIJ, 2830 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2831 NULL, 2832 MatGetGhosts_MPIAIJ, 2833 NULL, 2834 NULL, 2835 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2836 NULL, 2837 NULL, 2838 NULL, 2839 MatGetMultiProcBlock_MPIAIJ, 2840 /*124*/ MatFindNonzeroRows_MPIAIJ, 2841 MatGetColumnReductions_MPIAIJ, 2842 MatInvertBlockDiagonal_MPIAIJ, 2843 MatInvertVariableBlockDiagonal_MPIAIJ, 2844 MatCreateSubMatricesMPI_MPIAIJ, 2845 /*129*/ NULL, 2846 NULL, 2847 NULL, 2848 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2849 NULL, 2850 /*134*/ NULL, 2851 NULL, 2852 NULL, 2853 NULL, 2854 NULL, 2855 /*139*/ MatSetBlockSizes_MPIAIJ, 2856 NULL, 2857 NULL, 2858 MatFDColoringSetUp_MPIXAIJ, 2859 MatFindOffBlockDiagonalEntries_MPIAIJ, 2860 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2861 /*145*/ NULL, 2862 NULL, 2863 NULL, 2864 MatCreateGraph_Simple_AIJ, 2865 NULL, 2866 /*150*/ NULL, 2867 MatEliminateZeros_MPIAIJ, 2868 MatGetRowSumAbs_MPIAIJ}; 2869 2870 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2871 { 2872 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2873 2874 PetscFunctionBegin; 2875 PetscCall(MatStoreValues(aij->A)); 2876 PetscCall(MatStoreValues(aij->B)); 2877 PetscFunctionReturn(PETSC_SUCCESS); 2878 } 2879 2880 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2881 { 2882 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2883 2884 PetscFunctionBegin; 2885 PetscCall(MatRetrieveValues(aij->A)); 2886 PetscCall(MatRetrieveValues(aij->B)); 2887 PetscFunctionReturn(PETSC_SUCCESS); 2888 } 2889 2890 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2891 { 2892 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2893 PetscMPIInt size; 2894 2895 PetscFunctionBegin; 2896 if (B->hash_active) { 2897 B->ops[0] = b->cops; 2898 B->hash_active = PETSC_FALSE; 2899 } 2900 PetscCall(PetscLayoutSetUp(B->rmap)); 2901 PetscCall(PetscLayoutSetUp(B->cmap)); 2902 2903 #if defined(PETSC_USE_CTABLE) 2904 PetscCall(PetscHMapIDestroy(&b->colmap)); 2905 #else 2906 PetscCall(PetscFree(b->colmap)); 2907 #endif 2908 PetscCall(PetscFree(b->garray)); 2909 PetscCall(VecDestroy(&b->lvec)); 2910 PetscCall(VecScatterDestroy(&b->Mvctx)); 2911 2912 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2913 2914 MatSeqXAIJGetOptions_Private(b->B); 2915 PetscCall(MatDestroy(&b->B)); 2916 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2917 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2918 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2919 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2920 MatSeqXAIJRestoreOptions_Private(b->B); 2921 2922 MatSeqXAIJGetOptions_Private(b->A); 2923 PetscCall(MatDestroy(&b->A)); 2924 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2925 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2926 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2927 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2928 MatSeqXAIJRestoreOptions_Private(b->A); 2929 2930 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2931 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2932 B->preallocated = PETSC_TRUE; 2933 B->was_assembled = PETSC_FALSE; 2934 B->assembled = PETSC_FALSE; 2935 PetscFunctionReturn(PETSC_SUCCESS); 2936 } 2937 2938 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2939 { 2940 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2941 2942 PetscFunctionBegin; 2943 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2944 PetscCall(PetscLayoutSetUp(B->rmap)); 2945 PetscCall(PetscLayoutSetUp(B->cmap)); 2946 2947 #if defined(PETSC_USE_CTABLE) 2948 PetscCall(PetscHMapIDestroy(&b->colmap)); 2949 #else 2950 PetscCall(PetscFree(b->colmap)); 2951 #endif 2952 PetscCall(PetscFree(b->garray)); 2953 PetscCall(VecDestroy(&b->lvec)); 2954 PetscCall(VecScatterDestroy(&b->Mvctx)); 2955 2956 PetscCall(MatResetPreallocation(b->A)); 2957 PetscCall(MatResetPreallocation(b->B)); 2958 B->preallocated = PETSC_TRUE; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 PetscFunctionReturn(PETSC_SUCCESS); 2962 } 2963 2964 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2965 { 2966 Mat mat; 2967 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2968 2969 PetscFunctionBegin; 2970 *newmat = NULL; 2971 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2972 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2973 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2974 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2975 a = (Mat_MPIAIJ *)mat->data; 2976 2977 mat->factortype = matin->factortype; 2978 mat->assembled = matin->assembled; 2979 mat->insertmode = NOT_SET_VALUES; 2980 2981 a->size = oldmat->size; 2982 a->rank = oldmat->rank; 2983 a->donotstash = oldmat->donotstash; 2984 a->roworiented = oldmat->roworiented; 2985 a->rowindices = NULL; 2986 a->rowvalues = NULL; 2987 a->getrowactive = PETSC_FALSE; 2988 2989 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2990 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2991 if (matin->hash_active) { 2992 PetscCall(MatSetUp(mat)); 2993 } else { 2994 mat->preallocated = matin->preallocated; 2995 if (oldmat->colmap) { 2996 #if defined(PETSC_USE_CTABLE) 2997 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2998 #else 2999 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3000 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3001 #endif 3002 } else a->colmap = NULL; 3003 if (oldmat->garray) { 3004 PetscInt len; 3005 len = oldmat->B->cmap->n; 3006 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3007 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3008 } else a->garray = NULL; 3009 3010 /* It may happen MatDuplicate is called with a non-assembled matrix 3011 In fact, MatDuplicate only requires the matrix to be preallocated 3012 This may happen inside a DMCreateMatrix_Shell */ 3013 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3014 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3015 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3016 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3017 } 3018 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3019 *newmat = mat; 3020 PetscFunctionReturn(PETSC_SUCCESS); 3021 } 3022 3023 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3024 { 3025 PetscBool isbinary, ishdf5; 3026 3027 PetscFunctionBegin; 3028 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3029 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3030 /* force binary viewer to load .info file if it has not yet done so */ 3031 PetscCall(PetscViewerSetUp(viewer)); 3032 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3033 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3034 if (isbinary) { 3035 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3036 } else if (ishdf5) { 3037 #if defined(PETSC_HAVE_HDF5) 3038 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3039 #else 3040 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3041 #endif 3042 } else { 3043 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3044 } 3045 PetscFunctionReturn(PETSC_SUCCESS); 3046 } 3047 3048 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3049 { 3050 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3051 PetscInt *rowidxs, *colidxs; 3052 PetscScalar *matvals; 3053 3054 PetscFunctionBegin; 3055 PetscCall(PetscViewerSetUp(viewer)); 3056 3057 /* read in matrix header */ 3058 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3059 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3060 M = header[1]; 3061 N = header[2]; 3062 nz = header[3]; 3063 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3064 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3065 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3066 3067 /* set block sizes from the viewer's .info file */ 3068 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3069 /* set global sizes if not set already */ 3070 if (mat->rmap->N < 0) mat->rmap->N = M; 3071 if (mat->cmap->N < 0) mat->cmap->N = N; 3072 PetscCall(PetscLayoutSetUp(mat->rmap)); 3073 PetscCall(PetscLayoutSetUp(mat->cmap)); 3074 3075 /* check if the matrix sizes are correct */ 3076 PetscCall(MatGetSize(mat, &rows, &cols)); 3077 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3078 3079 /* read in row lengths and build row indices */ 3080 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3081 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3082 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3083 rowidxs[0] = 0; 3084 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3085 if (nz != PETSC_MAX_INT) { 3086 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3087 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3088 } 3089 3090 /* read in column indices and matrix values */ 3091 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3092 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3093 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3094 /* store matrix indices and values */ 3095 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3096 PetscCall(PetscFree(rowidxs)); 3097 PetscCall(PetscFree2(colidxs, matvals)); 3098 PetscFunctionReturn(PETSC_SUCCESS); 3099 } 3100 3101 /* Not scalable because of ISAllGather() unless getting all columns. */ 3102 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3103 { 3104 IS iscol_local; 3105 PetscBool isstride; 3106 PetscMPIInt lisstride = 0, gisstride; 3107 3108 PetscFunctionBegin; 3109 /* check if we are grabbing all columns*/ 3110 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3111 3112 if (isstride) { 3113 PetscInt start, len, mstart, mlen; 3114 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3115 PetscCall(ISGetLocalSize(iscol, &len)); 3116 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3117 if (mstart == start && mlen - mstart == len) lisstride = 1; 3118 } 3119 3120 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3121 if (gisstride) { 3122 PetscInt N; 3123 PetscCall(MatGetSize(mat, NULL, &N)); 3124 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3125 PetscCall(ISSetIdentity(iscol_local)); 3126 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3127 } else { 3128 PetscInt cbs; 3129 PetscCall(ISGetBlockSize(iscol, &cbs)); 3130 PetscCall(ISAllGather(iscol, &iscol_local)); 3131 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3132 } 3133 3134 *isseq = iscol_local; 3135 PetscFunctionReturn(PETSC_SUCCESS); 3136 } 3137 3138 /* 3139 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3140 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3141 3142 Input Parameters: 3143 + mat - matrix 3144 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3145 i.e., mat->rstart <= isrow[i] < mat->rend 3146 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3147 i.e., mat->cstart <= iscol[i] < mat->cend 3148 3149 Output Parameters: 3150 + isrow_d - sequential row index set for retrieving mat->A 3151 . iscol_d - sequential column index set for retrieving mat->A 3152 . iscol_o - sequential column index set for retrieving mat->B 3153 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3154 */ 3155 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3156 { 3157 Vec x, cmap; 3158 const PetscInt *is_idx; 3159 PetscScalar *xarray, *cmaparray; 3160 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3162 Mat B = a->B; 3163 Vec lvec = a->lvec, lcmap; 3164 PetscInt i, cstart, cend, Bn = B->cmap->N; 3165 MPI_Comm comm; 3166 VecScatter Mvctx = a->Mvctx; 3167 3168 PetscFunctionBegin; 3169 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3170 PetscCall(ISGetLocalSize(iscol, &ncols)); 3171 3172 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3173 PetscCall(MatCreateVecs(mat, &x, NULL)); 3174 PetscCall(VecSet(x, -1.0)); 3175 PetscCall(VecDuplicate(x, &cmap)); 3176 PetscCall(VecSet(cmap, -1.0)); 3177 3178 /* Get start indices */ 3179 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3180 isstart -= ncols; 3181 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3182 3183 PetscCall(ISGetIndices(iscol, &is_idx)); 3184 PetscCall(VecGetArray(x, &xarray)); 3185 PetscCall(VecGetArray(cmap, &cmaparray)); 3186 PetscCall(PetscMalloc1(ncols, &idx)); 3187 for (i = 0; i < ncols; i++) { 3188 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3189 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3190 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3191 } 3192 PetscCall(VecRestoreArray(x, &xarray)); 3193 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3194 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3195 3196 /* Get iscol_d */ 3197 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3198 PetscCall(ISGetBlockSize(iscol, &i)); 3199 PetscCall(ISSetBlockSize(*iscol_d, i)); 3200 3201 /* Get isrow_d */ 3202 PetscCall(ISGetLocalSize(isrow, &m)); 3203 rstart = mat->rmap->rstart; 3204 PetscCall(PetscMalloc1(m, &idx)); 3205 PetscCall(ISGetIndices(isrow, &is_idx)); 3206 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3207 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3208 3209 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3210 PetscCall(ISGetBlockSize(isrow, &i)); 3211 PetscCall(ISSetBlockSize(*isrow_d, i)); 3212 3213 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3214 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3215 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3216 3217 PetscCall(VecDuplicate(lvec, &lcmap)); 3218 3219 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3223 /* off-process column indices */ 3224 count = 0; 3225 PetscCall(PetscMalloc1(Bn, &idx)); 3226 PetscCall(PetscMalloc1(Bn, &cmap1)); 3227 3228 PetscCall(VecGetArray(lvec, &xarray)); 3229 PetscCall(VecGetArray(lcmap, &cmaparray)); 3230 for (i = 0; i < Bn; i++) { 3231 if (PetscRealPart(xarray[i]) > -1.0) { 3232 idx[count] = i; /* local column index in off-diagonal part B */ 3233 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3234 count++; 3235 } 3236 } 3237 PetscCall(VecRestoreArray(lvec, &xarray)); 3238 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3239 3240 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3241 /* cannot ensure iscol_o has same blocksize as iscol! */ 3242 3243 PetscCall(PetscFree(idx)); 3244 *garray = cmap1; 3245 3246 PetscCall(VecDestroy(&x)); 3247 PetscCall(VecDestroy(&cmap)); 3248 PetscCall(VecDestroy(&lcmap)); 3249 PetscFunctionReturn(PETSC_SUCCESS); 3250 } 3251 3252 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3253 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3254 { 3255 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3256 Mat M = NULL; 3257 MPI_Comm comm; 3258 IS iscol_d, isrow_d, iscol_o; 3259 Mat Asub = NULL, Bsub = NULL; 3260 PetscInt n; 3261 3262 PetscFunctionBegin; 3263 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3264 3265 if (call == MAT_REUSE_MATRIX) { 3266 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3267 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3268 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3269 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3271 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3274 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3275 3276 /* Update diagonal and off-diagonal portions of submat */ 3277 asub = (Mat_MPIAIJ *)(*submat)->data; 3278 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3279 PetscCall(ISGetLocalSize(iscol_o, &n)); 3280 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3281 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3282 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3283 3284 } else { /* call == MAT_INITIAL_MATRIX) */ 3285 const PetscInt *garray; 3286 PetscInt BsubN; 3287 3288 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3289 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3290 3291 /* Create local submatrices Asub and Bsub */ 3292 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3293 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3294 3295 /* Create submatrix M */ 3296 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3297 3298 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3299 asub = (Mat_MPIAIJ *)M->data; 3300 3301 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3302 n = asub->B->cmap->N; 3303 if (BsubN > n) { 3304 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3305 const PetscInt *idx; 3306 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3307 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3308 3309 PetscCall(PetscMalloc1(n, &idx_new)); 3310 j = 0; 3311 PetscCall(ISGetIndices(iscol_o, &idx)); 3312 for (i = 0; i < n; i++) { 3313 if (j >= BsubN) break; 3314 while (subgarray[i] > garray[j]) j++; 3315 3316 if (subgarray[i] == garray[j]) { 3317 idx_new[i] = idx[j++]; 3318 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3319 } 3320 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3321 3322 PetscCall(ISDestroy(&iscol_o)); 3323 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3324 3325 } else if (BsubN < n) { 3326 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3327 } 3328 3329 PetscCall(PetscFree(garray)); 3330 *submat = M; 3331 3332 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3333 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3334 PetscCall(ISDestroy(&isrow_d)); 3335 3336 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3337 PetscCall(ISDestroy(&iscol_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3340 PetscCall(ISDestroy(&iscol_o)); 3341 } 3342 PetscFunctionReturn(PETSC_SUCCESS); 3343 } 3344 3345 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3346 { 3347 IS iscol_local = NULL, isrow_d; 3348 PetscInt csize; 3349 PetscInt n, i, j, start, end; 3350 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3351 MPI_Comm comm; 3352 3353 PetscFunctionBegin; 3354 /* If isrow has same processor distribution as mat, 3355 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3356 if (call == MAT_REUSE_MATRIX) { 3357 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3358 if (isrow_d) { 3359 sameRowDist = PETSC_TRUE; 3360 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3361 } else { 3362 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3363 if (iscol_local) { 3364 sameRowDist = PETSC_TRUE; 3365 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3366 } 3367 } 3368 } else { 3369 /* Check if isrow has same processor distribution as mat */ 3370 sameDist[0] = PETSC_FALSE; 3371 PetscCall(ISGetLocalSize(isrow, &n)); 3372 if (!n) { 3373 sameDist[0] = PETSC_TRUE; 3374 } else { 3375 PetscCall(ISGetMinMax(isrow, &i, &j)); 3376 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3377 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3378 } 3379 3380 /* Check if iscol has same processor distribution as mat */ 3381 sameDist[1] = PETSC_FALSE; 3382 PetscCall(ISGetLocalSize(iscol, &n)); 3383 if (!n) { 3384 sameDist[1] = PETSC_TRUE; 3385 } else { 3386 PetscCall(ISGetMinMax(iscol, &i, &j)); 3387 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3388 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3389 } 3390 3391 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3392 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3393 sameRowDist = tsameDist[0]; 3394 } 3395 3396 if (sameRowDist) { 3397 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3398 /* isrow and iscol have same processor distribution as mat */ 3399 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } else { /* sameRowDist */ 3402 /* isrow has same processor distribution as mat */ 3403 if (call == MAT_INITIAL_MATRIX) { 3404 PetscBool sorted; 3405 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3406 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3407 PetscCall(ISGetSize(iscol, &i)); 3408 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3409 3410 PetscCall(ISSorted(iscol_local, &sorted)); 3411 if (sorted) { 3412 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3413 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 } else { /* call == MAT_REUSE_MATRIX */ 3417 IS iscol_sub; 3418 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3419 if (iscol_sub) { 3420 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3421 PetscFunctionReturn(PETSC_SUCCESS); 3422 } 3423 } 3424 } 3425 } 3426 3427 /* General case: iscol -> iscol_local which has global size of iscol */ 3428 if (call == MAT_REUSE_MATRIX) { 3429 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3430 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3431 } else { 3432 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3433 } 3434 3435 PetscCall(ISGetLocalSize(iscol, &csize)); 3436 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3437 3438 if (call == MAT_INITIAL_MATRIX) { 3439 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3440 PetscCall(ISDestroy(&iscol_local)); 3441 } 3442 PetscFunctionReturn(PETSC_SUCCESS); 3443 } 3444 3445 /*@C 3446 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3447 and "off-diagonal" part of the matrix in CSR format. 3448 3449 Collective 3450 3451 Input Parameters: 3452 + comm - MPI communicator 3453 . A - "diagonal" portion of matrix 3454 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3455 - garray - global index of `B` columns 3456 3457 Output Parameter: 3458 . mat - the matrix, with input `A` as its local diagonal matrix 3459 3460 Level: advanced 3461 3462 Notes: 3463 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3464 3465 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3466 3467 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3468 @*/ 3469 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3470 { 3471 Mat_MPIAIJ *maij; 3472 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3473 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3474 const PetscScalar *oa; 3475 Mat Bnew; 3476 PetscInt m, n, N; 3477 MatType mpi_mat_type; 3478 3479 PetscFunctionBegin; 3480 PetscCall(MatCreate(comm, mat)); 3481 PetscCall(MatGetSize(A, &m, &n)); 3482 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3483 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3484 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3485 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3486 3487 /* Get global columns of mat */ 3488 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3489 3490 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3491 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3492 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3493 PetscCall(MatSetType(*mat, mpi_mat_type)); 3494 3495 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3496 maij = (Mat_MPIAIJ *)(*mat)->data; 3497 3498 (*mat)->preallocated = PETSC_TRUE; 3499 3500 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3501 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3502 3503 /* Set A as diagonal portion of *mat */ 3504 maij->A = A; 3505 3506 nz = oi[m]; 3507 for (i = 0; i < nz; i++) { 3508 col = oj[i]; 3509 oj[i] = garray[col]; 3510 } 3511 3512 /* Set Bnew as off-diagonal portion of *mat */ 3513 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3514 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3515 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3516 bnew = (Mat_SeqAIJ *)Bnew->data; 3517 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3518 maij->B = Bnew; 3519 3520 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3521 3522 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3523 b->free_a = PETSC_FALSE; 3524 b->free_ij = PETSC_FALSE; 3525 PetscCall(MatDestroy(&B)); 3526 3527 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3528 bnew->free_a = PETSC_TRUE; 3529 bnew->free_ij = PETSC_TRUE; 3530 3531 /* condense columns of maij->B */ 3532 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3533 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3534 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3535 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3536 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3537 PetscFunctionReturn(PETSC_SUCCESS); 3538 } 3539 3540 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3541 3542 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3543 { 3544 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3545 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3546 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3547 Mat M, Msub, B = a->B; 3548 MatScalar *aa; 3549 Mat_SeqAIJ *aij; 3550 PetscInt *garray = a->garray, *colsub, Ncols; 3551 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3552 IS iscol_sub, iscmap; 3553 const PetscInt *is_idx, *cmap; 3554 PetscBool allcolumns = PETSC_FALSE; 3555 MPI_Comm comm; 3556 3557 PetscFunctionBegin; 3558 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3559 if (call == MAT_REUSE_MATRIX) { 3560 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3561 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3562 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3563 3564 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3565 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3566 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3568 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3569 3570 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3571 3572 } else { /* call == MAT_INITIAL_MATRIX) */ 3573 PetscBool flg; 3574 3575 PetscCall(ISGetLocalSize(iscol, &n)); 3576 PetscCall(ISGetSize(iscol, &Ncols)); 3577 3578 /* (1) iscol -> nonscalable iscol_local */ 3579 /* Check for special case: each processor gets entire matrix columns */ 3580 PetscCall(ISIdentity(iscol_local, &flg)); 3581 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3582 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3583 if (allcolumns) { 3584 iscol_sub = iscol_local; 3585 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3586 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3587 3588 } else { 3589 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3590 PetscInt *idx, *cmap1, k; 3591 PetscCall(PetscMalloc1(Ncols, &idx)); 3592 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3593 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3594 count = 0; 3595 k = 0; 3596 for (i = 0; i < Ncols; i++) { 3597 j = is_idx[i]; 3598 if (j >= cstart && j < cend) { 3599 /* diagonal part of mat */ 3600 idx[count] = j; 3601 cmap1[count++] = i; /* column index in submat */ 3602 } else if (Bn) { 3603 /* off-diagonal part of mat */ 3604 if (j == garray[k]) { 3605 idx[count] = j; 3606 cmap1[count++] = i; /* column index in submat */ 3607 } else if (j > garray[k]) { 3608 while (j > garray[k] && k < Bn - 1) k++; 3609 if (j == garray[k]) { 3610 idx[count] = j; 3611 cmap1[count++] = i; /* column index in submat */ 3612 } 3613 } 3614 } 3615 } 3616 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3617 3618 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3619 PetscCall(ISGetBlockSize(iscol, &cbs)); 3620 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3621 3622 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3623 } 3624 3625 /* (3) Create sequential Msub */ 3626 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3627 } 3628 3629 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3630 aij = (Mat_SeqAIJ *)(Msub)->data; 3631 ii = aij->i; 3632 PetscCall(ISGetIndices(iscmap, &cmap)); 3633 3634 /* 3635 m - number of local rows 3636 Ncols - number of columns (same on all processors) 3637 rstart - first row in new global matrix generated 3638 */ 3639 PetscCall(MatGetSize(Msub, &m, NULL)); 3640 3641 if (call == MAT_INITIAL_MATRIX) { 3642 /* (4) Create parallel newmat */ 3643 PetscMPIInt rank, size; 3644 PetscInt csize; 3645 3646 PetscCallMPI(MPI_Comm_size(comm, &size)); 3647 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3648 3649 /* 3650 Determine the number of non-zeros in the diagonal and off-diagonal 3651 portions of the matrix in order to do correct preallocation 3652 */ 3653 3654 /* first get start and end of "diagonal" columns */ 3655 PetscCall(ISGetLocalSize(iscol, &csize)); 3656 if (csize == PETSC_DECIDE) { 3657 PetscCall(ISGetSize(isrow, &mglobal)); 3658 if (mglobal == Ncols) { /* square matrix */ 3659 nlocal = m; 3660 } else { 3661 nlocal = Ncols / size + ((Ncols % size) > rank); 3662 } 3663 } else { 3664 nlocal = csize; 3665 } 3666 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3667 rstart = rend - nlocal; 3668 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3669 3670 /* next, compute all the lengths */ 3671 jj = aij->j; 3672 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3673 olens = dlens + m; 3674 for (i = 0; i < m; i++) { 3675 jend = ii[i + 1] - ii[i]; 3676 olen = 0; 3677 dlen = 0; 3678 for (j = 0; j < jend; j++) { 3679 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3680 else dlen++; 3681 jj++; 3682 } 3683 olens[i] = olen; 3684 dlens[i] = dlen; 3685 } 3686 3687 PetscCall(ISGetBlockSize(isrow, &bs)); 3688 PetscCall(ISGetBlockSize(iscol, &cbs)); 3689 3690 PetscCall(MatCreate(comm, &M)); 3691 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3692 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3693 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3694 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3695 PetscCall(PetscFree(dlens)); 3696 3697 } else { /* call == MAT_REUSE_MATRIX */ 3698 M = *newmat; 3699 PetscCall(MatGetLocalSize(M, &i, NULL)); 3700 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3701 PetscCall(MatZeroEntries(M)); 3702 /* 3703 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3704 rather than the slower MatSetValues(). 3705 */ 3706 M->was_assembled = PETSC_TRUE; 3707 M->assembled = PETSC_FALSE; 3708 } 3709 3710 /* (5) Set values of Msub to *newmat */ 3711 PetscCall(PetscMalloc1(count, &colsub)); 3712 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3713 3714 jj = aij->j; 3715 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3716 for (i = 0; i < m; i++) { 3717 row = rstart + i; 3718 nz = ii[i + 1] - ii[i]; 3719 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3720 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3721 jj += nz; 3722 aa += nz; 3723 } 3724 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3725 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3726 3727 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3728 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3729 3730 PetscCall(PetscFree(colsub)); 3731 3732 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3733 if (call == MAT_INITIAL_MATRIX) { 3734 *newmat = M; 3735 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3736 PetscCall(MatDestroy(&Msub)); 3737 3738 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3739 PetscCall(ISDestroy(&iscol_sub)); 3740 3741 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3742 PetscCall(ISDestroy(&iscmap)); 3743 3744 if (iscol_local) { 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3746 PetscCall(ISDestroy(&iscol_local)); 3747 } 3748 } 3749 PetscFunctionReturn(PETSC_SUCCESS); 3750 } 3751 3752 /* 3753 Not great since it makes two copies of the submatrix, first an SeqAIJ 3754 in local and then by concatenating the local matrices the end result. 3755 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3756 3757 This requires a sequential iscol with all indices. 3758 */ 3759 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3760 { 3761 PetscMPIInt rank, size; 3762 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3763 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3764 Mat M, Mreuse; 3765 MatScalar *aa, *vwork; 3766 MPI_Comm comm; 3767 Mat_SeqAIJ *aij; 3768 PetscBool colflag, allcolumns = PETSC_FALSE; 3769 3770 PetscFunctionBegin; 3771 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3772 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3773 PetscCallMPI(MPI_Comm_size(comm, &size)); 3774 3775 /* Check for special case: each processor gets entire matrix columns */ 3776 PetscCall(ISIdentity(iscol, &colflag)); 3777 PetscCall(ISGetLocalSize(iscol, &n)); 3778 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3779 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3780 3781 if (call == MAT_REUSE_MATRIX) { 3782 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3783 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3784 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3785 } else { 3786 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3787 } 3788 3789 /* 3790 m - number of local rows 3791 n - number of columns (same on all processors) 3792 rstart - first row in new global matrix generated 3793 */ 3794 PetscCall(MatGetSize(Mreuse, &m, &n)); 3795 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3796 if (call == MAT_INITIAL_MATRIX) { 3797 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3798 ii = aij->i; 3799 jj = aij->j; 3800 3801 /* 3802 Determine the number of non-zeros in the diagonal and off-diagonal 3803 portions of the matrix in order to do correct preallocation 3804 */ 3805 3806 /* first get start and end of "diagonal" columns */ 3807 if (csize == PETSC_DECIDE) { 3808 PetscCall(ISGetSize(isrow, &mglobal)); 3809 if (mglobal == n) { /* square matrix */ 3810 nlocal = m; 3811 } else { 3812 nlocal = n / size + ((n % size) > rank); 3813 } 3814 } else { 3815 nlocal = csize; 3816 } 3817 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3818 rstart = rend - nlocal; 3819 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3820 3821 /* next, compute all the lengths */ 3822 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3823 olens = dlens + m; 3824 for (i = 0; i < m; i++) { 3825 jend = ii[i + 1] - ii[i]; 3826 olen = 0; 3827 dlen = 0; 3828 for (j = 0; j < jend; j++) { 3829 if (*jj < rstart || *jj >= rend) olen++; 3830 else dlen++; 3831 jj++; 3832 } 3833 olens[i] = olen; 3834 dlens[i] = dlen; 3835 } 3836 PetscCall(MatCreate(comm, &M)); 3837 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3838 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3839 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3840 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3841 PetscCall(PetscFree(dlens)); 3842 } else { 3843 PetscInt ml, nl; 3844 3845 M = *newmat; 3846 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3847 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3848 PetscCall(MatZeroEntries(M)); 3849 /* 3850 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3851 rather than the slower MatSetValues(). 3852 */ 3853 M->was_assembled = PETSC_TRUE; 3854 M->assembled = PETSC_FALSE; 3855 } 3856 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3857 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3858 ii = aij->i; 3859 jj = aij->j; 3860 3861 /* trigger copy to CPU if needed */ 3862 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3863 for (i = 0; i < m; i++) { 3864 row = rstart + i; 3865 nz = ii[i + 1] - ii[i]; 3866 cwork = jj; 3867 jj = PetscSafePointerPlusOffset(jj, nz); 3868 vwork = aa; 3869 aa = PetscSafePointerPlusOffset(aa, nz); 3870 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3871 } 3872 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3873 3874 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3875 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3876 *newmat = M; 3877 3878 /* save submatrix used in processor for next request */ 3879 if (call == MAT_INITIAL_MATRIX) { 3880 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3881 PetscCall(MatDestroy(&Mreuse)); 3882 } 3883 PetscFunctionReturn(PETSC_SUCCESS); 3884 } 3885 3886 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3887 { 3888 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3889 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3890 const PetscInt *JJ; 3891 PetscBool nooffprocentries; 3892 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3893 3894 PetscFunctionBegin; 3895 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3896 3897 PetscCall(PetscLayoutSetUp(B->rmap)); 3898 PetscCall(PetscLayoutSetUp(B->cmap)); 3899 m = B->rmap->n; 3900 cstart = B->cmap->rstart; 3901 cend = B->cmap->rend; 3902 rstart = B->rmap->rstart; 3903 3904 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3905 3906 if (PetscDefined(USE_DEBUG)) { 3907 for (i = 0; i < m; i++) { 3908 nnz = Ii[i + 1] - Ii[i]; 3909 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3910 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3911 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3912 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3913 } 3914 } 3915 3916 for (i = 0; i < m; i++) { 3917 nnz = Ii[i + 1] - Ii[i]; 3918 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3919 nnz_max = PetscMax(nnz_max, nnz); 3920 d = 0; 3921 for (j = 0; j < nnz; j++) { 3922 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3923 } 3924 d_nnz[i] = d; 3925 o_nnz[i] = nnz - d; 3926 } 3927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3928 PetscCall(PetscFree2(d_nnz, o_nnz)); 3929 3930 for (i = 0; i < m; i++) { 3931 ii = i + rstart; 3932 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3933 } 3934 nooffprocentries = B->nooffprocentries; 3935 B->nooffprocentries = PETSC_TRUE; 3936 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3937 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3938 B->nooffprocentries = nooffprocentries; 3939 3940 /* count number of entries below block diagonal */ 3941 PetscCall(PetscFree(Aij->ld)); 3942 PetscCall(PetscCalloc1(m, &ld)); 3943 Aij->ld = ld; 3944 for (i = 0; i < m; i++) { 3945 nnz = Ii[i + 1] - Ii[i]; 3946 j = 0; 3947 while (j < nnz && J[j] < cstart) j++; 3948 ld[i] = j; 3949 if (J) J += nnz; 3950 } 3951 3952 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3953 PetscFunctionReturn(PETSC_SUCCESS); 3954 } 3955 3956 /*@ 3957 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3958 (the default parallel PETSc format). 3959 3960 Collective 3961 3962 Input Parameters: 3963 + B - the matrix 3964 . i - the indices into `j` for the start of each local row (indices start with zero) 3965 . j - the column indices for each local row (indices start with zero) 3966 - v - optional values in the matrix 3967 3968 Level: developer 3969 3970 Notes: 3971 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3972 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3973 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3974 3975 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3976 3977 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3978 3979 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3980 3981 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3982 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3983 3984 The format which is used for the sparse matrix input, is equivalent to a 3985 row-major ordering.. i.e for the following matrix, the input data expected is 3986 as shown 3987 .vb 3988 1 0 0 3989 2 0 3 P0 3990 ------- 3991 4 5 6 P1 3992 3993 Process0 [P0] rows_owned=[0,1] 3994 i = {0,1,3} [size = nrow+1 = 2+1] 3995 j = {0,0,2} [size = 3] 3996 v = {1,2,3} [size = 3] 3997 3998 Process1 [P1] rows_owned=[2] 3999 i = {0,3} [size = nrow+1 = 1+1] 4000 j = {0,1,2} [size = 3] 4001 v = {4,5,6} [size = 3] 4002 .ve 4003 4004 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4005 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4006 @*/ 4007 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4008 { 4009 PetscFunctionBegin; 4010 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4011 PetscFunctionReturn(PETSC_SUCCESS); 4012 } 4013 4014 /*@C 4015 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4016 (the default parallel PETSc format). For good matrix assembly performance 4017 the user should preallocate the matrix storage by setting the parameters 4018 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4019 4020 Collective 4021 4022 Input Parameters: 4023 + B - the matrix 4024 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4025 (same value is used for all local rows) 4026 . d_nnz - array containing the number of nonzeros in the various rows of the 4027 DIAGONAL portion of the local submatrix (possibly different for each row) 4028 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4029 The size of this array is equal to the number of local rows, i.e 'm'. 4030 For matrices that will be factored, you must leave room for (and set) 4031 the diagonal entry even if it is zero. 4032 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4033 submatrix (same value is used for all local rows). 4034 - o_nnz - array containing the number of nonzeros in the various rows of the 4035 OFF-DIAGONAL portion of the local submatrix (possibly different for 4036 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4037 structure. The size of this array is equal to the number 4038 of local rows, i.e 'm'. 4039 4040 Example Usage: 4041 Consider the following 8x8 matrix with 34 non-zero values, that is 4042 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4043 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4044 as follows 4045 4046 .vb 4047 1 2 0 | 0 3 0 | 0 4 4048 Proc0 0 5 6 | 7 0 0 | 8 0 4049 9 0 10 | 11 0 0 | 12 0 4050 ------------------------------------- 4051 13 0 14 | 15 16 17 | 0 0 4052 Proc1 0 18 0 | 19 20 21 | 0 0 4053 0 0 0 | 22 23 0 | 24 0 4054 ------------------------------------- 4055 Proc2 25 26 27 | 0 0 28 | 29 0 4056 30 0 0 | 31 32 33 | 0 34 4057 .ve 4058 4059 This can be represented as a collection of submatrices as 4060 .vb 4061 A B C 4062 D E F 4063 G H I 4064 .ve 4065 4066 Where the submatrices A,B,C are owned by proc0, D,E,F are 4067 owned by proc1, G,H,I are owned by proc2. 4068 4069 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4070 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4071 The 'M','N' parameters are 8,8, and have the same values on all procs. 4072 4073 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4074 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4075 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4076 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4077 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4078 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4079 4080 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4081 allocated for every row of the local diagonal submatrix, and `o_nz` 4082 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4083 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4084 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4085 In this case, the values of `d_nz`, `o_nz` are 4086 .vb 4087 proc0 dnz = 2, o_nz = 2 4088 proc1 dnz = 3, o_nz = 2 4089 proc2 dnz = 1, o_nz = 4 4090 .ve 4091 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4092 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4093 for proc3. i.e we are using 12+15+10=37 storage locations to store 4094 34 values. 4095 4096 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4097 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4098 In the above case the values for `d_nnz`, `o_nnz` are 4099 .vb 4100 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4101 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4102 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4103 .ve 4104 Here the space allocated is sum of all the above values i.e 34, and 4105 hence pre-allocation is perfect. 4106 4107 Level: intermediate 4108 4109 Notes: 4110 If the *_nnz parameter is given then the *_nz parameter is ignored 4111 4112 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4113 storage. The stored row and column indices begin with zero. 4114 See [Sparse Matrices](sec_matsparse) for details. 4115 4116 The parallel matrix is partitioned such that the first m0 rows belong to 4117 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4118 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4119 4120 The DIAGONAL portion of the local submatrix of a processor can be defined 4121 as the submatrix which is obtained by extraction the part corresponding to 4122 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4123 first row that belongs to the processor, r2 is the last row belonging to 4124 the this processor, and c1-c2 is range of indices of the local part of a 4125 vector suitable for applying the matrix to. This is an mxn matrix. In the 4126 common case of a square matrix, the row and column ranges are the same and 4127 the DIAGONAL part is also square. The remaining portion of the local 4128 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4129 4130 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4131 4132 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4133 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4134 You can also run with the option `-info` and look for messages with the string 4135 malloc in them to see if additional memory allocation was needed. 4136 4137 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4138 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4139 @*/ 4140 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4141 { 4142 PetscFunctionBegin; 4143 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4144 PetscValidType(B, 1); 4145 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4146 PetscFunctionReturn(PETSC_SUCCESS); 4147 } 4148 4149 /*@ 4150 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4151 CSR format for the local rows. 4152 4153 Collective 4154 4155 Input Parameters: 4156 + comm - MPI communicator 4157 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4158 . n - This value should be the same as the local size used in creating the 4159 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4160 calculated if `N` is given) For square matrices n is almost always `m`. 4161 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4162 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4163 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4164 . j - global column indices 4165 - a - optional matrix values 4166 4167 Output Parameter: 4168 . mat - the matrix 4169 4170 Level: intermediate 4171 4172 Notes: 4173 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4174 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4175 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4176 4177 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4178 4179 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4180 4181 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4182 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4183 4184 The format which is used for the sparse matrix input, is equivalent to a 4185 row-major ordering, i.e., for the following matrix, the input data expected is 4186 as shown 4187 .vb 4188 1 0 0 4189 2 0 3 P0 4190 ------- 4191 4 5 6 P1 4192 4193 Process0 [P0] rows_owned=[0,1] 4194 i = {0,1,3} [size = nrow+1 = 2+1] 4195 j = {0,0,2} [size = 3] 4196 v = {1,2,3} [size = 3] 4197 4198 Process1 [P1] rows_owned=[2] 4199 i = {0,3} [size = nrow+1 = 1+1] 4200 j = {0,1,2} [size = 3] 4201 v = {4,5,6} [size = 3] 4202 .ve 4203 4204 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4205 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4206 @*/ 4207 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4208 { 4209 PetscFunctionBegin; 4210 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4211 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4212 PetscCall(MatCreate(comm, mat)); 4213 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4214 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4215 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4216 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4217 PetscFunctionReturn(PETSC_SUCCESS); 4218 } 4219 4220 /*@ 4221 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4222 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4223 from `MatCreateMPIAIJWithArrays()` 4224 4225 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4226 4227 Collective 4228 4229 Input Parameters: 4230 + mat - the matrix 4231 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4232 . n - This value should be the same as the local size used in creating the 4233 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4234 calculated if N is given) For square matrices n is almost always m. 4235 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4236 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4237 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4238 . J - column indices 4239 - v - matrix values 4240 4241 Level: deprecated 4242 4243 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4244 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4245 @*/ 4246 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4247 { 4248 PetscInt nnz, i; 4249 PetscBool nooffprocentries; 4250 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4251 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4252 PetscScalar *ad, *ao; 4253 PetscInt ldi, Iii, md; 4254 const PetscInt *Adi = Ad->i; 4255 PetscInt *ld = Aij->ld; 4256 4257 PetscFunctionBegin; 4258 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4259 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4260 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4261 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4262 4263 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4264 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4265 4266 for (i = 0; i < m; i++) { 4267 if (PetscDefined(USE_DEBUG)) { 4268 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4269 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4270 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4271 } 4272 } 4273 nnz = Ii[i + 1] - Ii[i]; 4274 Iii = Ii[i]; 4275 ldi = ld[i]; 4276 md = Adi[i + 1] - Adi[i]; 4277 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4278 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4279 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4280 ad += md; 4281 ao += nnz - md; 4282 } 4283 nooffprocentries = mat->nooffprocentries; 4284 mat->nooffprocentries = PETSC_TRUE; 4285 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4286 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4288 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4289 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4290 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4291 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4292 mat->nooffprocentries = nooffprocentries; 4293 PetscFunctionReturn(PETSC_SUCCESS); 4294 } 4295 4296 /*@ 4297 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4298 4299 Collective 4300 4301 Input Parameters: 4302 + mat - the matrix 4303 - v - matrix values, stored by row 4304 4305 Level: intermediate 4306 4307 Notes: 4308 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4309 4310 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4311 4312 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4313 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4314 @*/ 4315 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4316 { 4317 PetscInt nnz, i, m; 4318 PetscBool nooffprocentries; 4319 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4320 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4321 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4322 PetscScalar *ad, *ao; 4323 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4324 PetscInt ldi, Iii, md; 4325 PetscInt *ld = Aij->ld; 4326 4327 PetscFunctionBegin; 4328 m = mat->rmap->n; 4329 4330 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4331 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4332 Iii = 0; 4333 for (i = 0; i < m; i++) { 4334 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4335 ldi = ld[i]; 4336 md = Adi[i + 1] - Adi[i]; 4337 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4338 ad += md; 4339 if (ao) { 4340 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4341 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4342 ao += nnz - md; 4343 } 4344 Iii += nnz; 4345 } 4346 nooffprocentries = mat->nooffprocentries; 4347 mat->nooffprocentries = PETSC_TRUE; 4348 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4349 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4351 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4352 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4353 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4354 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4355 mat->nooffprocentries = nooffprocentries; 4356 PetscFunctionReturn(PETSC_SUCCESS); 4357 } 4358 4359 /*@C 4360 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4361 (the default parallel PETSc format). For good matrix assembly performance 4362 the user should preallocate the matrix storage by setting the parameters 4363 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4364 4365 Collective 4366 4367 Input Parameters: 4368 + comm - MPI communicator 4369 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4370 This value should be the same as the local size used in creating the 4371 y vector for the matrix-vector product y = Ax. 4372 . n - This value should be the same as the local size used in creating the 4373 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4374 calculated if N is given) For square matrices n is almost always m. 4375 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4376 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4377 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4378 (same value is used for all local rows) 4379 . d_nnz - array containing the number of nonzeros in the various rows of the 4380 DIAGONAL portion of the local submatrix (possibly different for each row) 4381 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4382 The size of this array is equal to the number of local rows, i.e 'm'. 4383 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4384 submatrix (same value is used for all local rows). 4385 - o_nnz - array containing the number of nonzeros in the various rows of the 4386 OFF-DIAGONAL portion of the local submatrix (possibly different for 4387 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4388 structure. The size of this array is equal to the number 4389 of local rows, i.e 'm'. 4390 4391 Output Parameter: 4392 . A - the matrix 4393 4394 Options Database Keys: 4395 + -mat_no_inode - Do not use inodes 4396 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4397 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4398 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4399 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4400 4401 Level: intermediate 4402 4403 Notes: 4404 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4405 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4406 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4407 4408 If the *_nnz parameter is given then the *_nz parameter is ignored 4409 4410 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4411 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4412 storage requirements for this matrix. 4413 4414 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4415 processor than it must be used on all processors that share the object for 4416 that argument. 4417 4418 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4419 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4420 4421 The user MUST specify either the local or global matrix dimensions 4422 (possibly both). 4423 4424 The parallel matrix is partitioned across processors such that the 4425 first `m0` rows belong to process 0, the next `m1` rows belong to 4426 process 1, the next `m2` rows belong to process 2, etc., where 4427 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4428 values corresponding to [m x N] submatrix. 4429 4430 The columns are logically partitioned with the n0 columns belonging 4431 to 0th partition, the next n1 columns belonging to the next 4432 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4433 4434 The DIAGONAL portion of the local submatrix on any given processor 4435 is the submatrix corresponding to the rows and columns m,n 4436 corresponding to the given processor. i.e diagonal matrix on 4437 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4438 etc. The remaining portion of the local submatrix [m x (N-n)] 4439 constitute the OFF-DIAGONAL portion. The example below better 4440 illustrates this concept. 4441 4442 For a square global matrix we define each processor's diagonal portion 4443 to be its local rows and the corresponding columns (a square submatrix); 4444 each processor's off-diagonal portion encompasses the remainder of the 4445 local matrix (a rectangular submatrix). 4446 4447 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4448 4449 When calling this routine with a single process communicator, a matrix of 4450 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4451 type of communicator, use the construction mechanism 4452 .vb 4453 MatCreate(..., &A); 4454 MatSetType(A, MATMPIAIJ); 4455 MatSetSizes(A, m, n, M, N); 4456 MatMPIAIJSetPreallocation(A, ...); 4457 .ve 4458 4459 By default, this format uses inodes (identical nodes) when possible. 4460 We search for consecutive rows with the same nonzero structure, thereby 4461 reusing matrix information to achieve increased efficiency. 4462 4463 Example Usage: 4464 Consider the following 8x8 matrix with 34 non-zero values, that is 4465 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4466 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4467 as follows 4468 4469 .vb 4470 1 2 0 | 0 3 0 | 0 4 4471 Proc0 0 5 6 | 7 0 0 | 8 0 4472 9 0 10 | 11 0 0 | 12 0 4473 ------------------------------------- 4474 13 0 14 | 15 16 17 | 0 0 4475 Proc1 0 18 0 | 19 20 21 | 0 0 4476 0 0 0 | 22 23 0 | 24 0 4477 ------------------------------------- 4478 Proc2 25 26 27 | 0 0 28 | 29 0 4479 30 0 0 | 31 32 33 | 0 34 4480 .ve 4481 4482 This can be represented as a collection of submatrices as 4483 4484 .vb 4485 A B C 4486 D E F 4487 G H I 4488 .ve 4489 4490 Where the submatrices A,B,C are owned by proc0, D,E,F are 4491 owned by proc1, G,H,I are owned by proc2. 4492 4493 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4494 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4495 The 'M','N' parameters are 8,8, and have the same values on all procs. 4496 4497 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4498 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4499 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4500 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4501 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4502 matrix, ans [DF] as another SeqAIJ matrix. 4503 4504 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4505 allocated for every row of the local diagonal submatrix, and `o_nz` 4506 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4507 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4508 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4509 In this case, the values of `d_nz`,`o_nz` are 4510 .vb 4511 proc0 dnz = 2, o_nz = 2 4512 proc1 dnz = 3, o_nz = 2 4513 proc2 dnz = 1, o_nz = 4 4514 .ve 4515 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4516 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4517 for proc3. i.e we are using 12+15+10=37 storage locations to store 4518 34 values. 4519 4520 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4521 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4522 In the above case the values for d_nnz,o_nnz are 4523 .vb 4524 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4525 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4526 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4527 .ve 4528 Here the space allocated is sum of all the above values i.e 34, and 4529 hence pre-allocation is perfect. 4530 4531 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4532 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4533 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4534 @*/ 4535 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4536 { 4537 PetscMPIInt size; 4538 4539 PetscFunctionBegin; 4540 PetscCall(MatCreate(comm, A)); 4541 PetscCall(MatSetSizes(*A, m, n, M, N)); 4542 PetscCallMPI(MPI_Comm_size(comm, &size)); 4543 if (size > 1) { 4544 PetscCall(MatSetType(*A, MATMPIAIJ)); 4545 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4546 } else { 4547 PetscCall(MatSetType(*A, MATSEQAIJ)); 4548 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4549 } 4550 PetscFunctionReturn(PETSC_SUCCESS); 4551 } 4552 4553 /*MC 4554 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4555 4556 Synopsis: 4557 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4558 4559 Not Collective 4560 4561 Input Parameter: 4562 . A - the `MATMPIAIJ` matrix 4563 4564 Output Parameters: 4565 + Ad - the diagonal portion of the matrix 4566 . Ao - the off-diagonal portion of the matrix 4567 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4568 - ierr - error code 4569 4570 Level: advanced 4571 4572 Note: 4573 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4574 4575 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4576 M*/ 4577 4578 /*MC 4579 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4580 4581 Synopsis: 4582 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4583 4584 Not Collective 4585 4586 Input Parameters: 4587 + A - the `MATMPIAIJ` matrix 4588 . Ad - the diagonal portion of the matrix 4589 . Ao - the off-diagonal portion of the matrix 4590 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4591 - ierr - error code 4592 4593 Level: advanced 4594 4595 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4596 M*/ 4597 4598 /*@C 4599 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4600 4601 Not Collective 4602 4603 Input Parameter: 4604 . A - The `MATMPIAIJ` matrix 4605 4606 Output Parameters: 4607 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4608 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4609 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4610 4611 Level: intermediate 4612 4613 Note: 4614 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4615 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4616 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4617 local column numbers to global column numbers in the original matrix. 4618 4619 Fortran Notes: 4620 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4621 4622 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4623 @*/ 4624 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4625 { 4626 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4627 PetscBool flg; 4628 4629 PetscFunctionBegin; 4630 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4631 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4632 if (Ad) *Ad = a->A; 4633 if (Ao) *Ao = a->B; 4634 if (colmap) *colmap = a->garray; 4635 PetscFunctionReturn(PETSC_SUCCESS); 4636 } 4637 4638 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4639 { 4640 PetscInt m, N, i, rstart, nnz, Ii; 4641 PetscInt *indx; 4642 PetscScalar *values; 4643 MatType rootType; 4644 4645 PetscFunctionBegin; 4646 PetscCall(MatGetSize(inmat, &m, &N)); 4647 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4648 PetscInt *dnz, *onz, sum, bs, cbs; 4649 4650 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4651 /* Check sum(n) = N */ 4652 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4653 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4654 4655 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4656 rstart -= m; 4657 4658 MatPreallocateBegin(comm, m, n, dnz, onz); 4659 for (i = 0; i < m; i++) { 4660 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4661 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4662 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4663 } 4664 4665 PetscCall(MatCreate(comm, outmat)); 4666 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4667 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4668 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4669 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4670 PetscCall(MatSetType(*outmat, rootType)); 4671 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4672 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4673 MatPreallocateEnd(dnz, onz); 4674 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4675 } 4676 4677 /* numeric phase */ 4678 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4679 for (i = 0; i < m; i++) { 4680 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4681 Ii = i + rstart; 4682 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4683 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4684 } 4685 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4686 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4687 PetscFunctionReturn(PETSC_SUCCESS); 4688 } 4689 4690 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4691 { 4692 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4693 4694 PetscFunctionBegin; 4695 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4696 PetscCall(PetscFree(merge->id_r)); 4697 PetscCall(PetscFree(merge->len_s)); 4698 PetscCall(PetscFree(merge->len_r)); 4699 PetscCall(PetscFree(merge->bi)); 4700 PetscCall(PetscFree(merge->bj)); 4701 PetscCall(PetscFree(merge->buf_ri[0])); 4702 PetscCall(PetscFree(merge->buf_ri)); 4703 PetscCall(PetscFree(merge->buf_rj[0])); 4704 PetscCall(PetscFree(merge->buf_rj)); 4705 PetscCall(PetscFree(merge->coi)); 4706 PetscCall(PetscFree(merge->coj)); 4707 PetscCall(PetscFree(merge->owners_co)); 4708 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4709 PetscCall(PetscFree(merge)); 4710 PetscFunctionReturn(PETSC_SUCCESS); 4711 } 4712 4713 #include <../src/mat/utils/freespace.h> 4714 #include <petscbt.h> 4715 4716 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4717 { 4718 MPI_Comm comm; 4719 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4720 PetscMPIInt size, rank, taga, *len_s; 4721 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4722 PetscInt proc, m; 4723 PetscInt **buf_ri, **buf_rj; 4724 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4725 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4726 MPI_Request *s_waits, *r_waits; 4727 MPI_Status *status; 4728 const MatScalar *aa, *a_a; 4729 MatScalar **abuf_r, *ba_i; 4730 Mat_Merge_SeqsToMPI *merge; 4731 PetscContainer container; 4732 4733 PetscFunctionBegin; 4734 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4735 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4736 4737 PetscCallMPI(MPI_Comm_size(comm, &size)); 4738 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4739 4740 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4741 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4742 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4743 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4744 aa = a_a; 4745 4746 bi = merge->bi; 4747 bj = merge->bj; 4748 buf_ri = merge->buf_ri; 4749 buf_rj = merge->buf_rj; 4750 4751 PetscCall(PetscMalloc1(size, &status)); 4752 owners = merge->rowmap->range; 4753 len_s = merge->len_s; 4754 4755 /* send and recv matrix values */ 4756 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4757 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4758 4759 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4760 for (proc = 0, k = 0; proc < size; proc++) { 4761 if (!len_s[proc]) continue; 4762 i = owners[proc]; 4763 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4764 k++; 4765 } 4766 4767 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4768 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4769 PetscCall(PetscFree(status)); 4770 4771 PetscCall(PetscFree(s_waits)); 4772 PetscCall(PetscFree(r_waits)); 4773 4774 /* insert mat values of mpimat */ 4775 PetscCall(PetscMalloc1(N, &ba_i)); 4776 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4777 4778 for (k = 0; k < merge->nrecv; k++) { 4779 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4780 nrows = *buf_ri_k[k]; 4781 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4782 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4783 } 4784 4785 /* set values of ba */ 4786 m = merge->rowmap->n; 4787 for (i = 0; i < m; i++) { 4788 arow = owners[rank] + i; 4789 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4790 bnzi = bi[i + 1] - bi[i]; 4791 PetscCall(PetscArrayzero(ba_i, bnzi)); 4792 4793 /* add local non-zero vals of this proc's seqmat into ba */ 4794 anzi = ai[arow + 1] - ai[arow]; 4795 aj = a->j + ai[arow]; 4796 aa = a_a + ai[arow]; 4797 nextaj = 0; 4798 for (j = 0; nextaj < anzi; j++) { 4799 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4800 ba_i[j] += aa[nextaj++]; 4801 } 4802 } 4803 4804 /* add received vals into ba */ 4805 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4806 /* i-th row */ 4807 if (i == *nextrow[k]) { 4808 anzi = *(nextai[k] + 1) - *nextai[k]; 4809 aj = buf_rj[k] + *nextai[k]; 4810 aa = abuf_r[k] + *nextai[k]; 4811 nextaj = 0; 4812 for (j = 0; nextaj < anzi; j++) { 4813 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4814 ba_i[j] += aa[nextaj++]; 4815 } 4816 } 4817 nextrow[k]++; 4818 nextai[k]++; 4819 } 4820 } 4821 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4822 } 4823 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4824 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4825 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4826 4827 PetscCall(PetscFree(abuf_r[0])); 4828 PetscCall(PetscFree(abuf_r)); 4829 PetscCall(PetscFree(ba_i)); 4830 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4831 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4832 PetscFunctionReturn(PETSC_SUCCESS); 4833 } 4834 4835 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4836 { 4837 Mat B_mpi; 4838 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4839 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4840 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4841 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4842 PetscInt len, proc, *dnz, *onz, bs, cbs; 4843 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4844 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4845 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4846 MPI_Status *status; 4847 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4848 PetscBT lnkbt; 4849 Mat_Merge_SeqsToMPI *merge; 4850 PetscContainer container; 4851 4852 PetscFunctionBegin; 4853 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4854 4855 /* make sure it is a PETSc comm */ 4856 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4857 PetscCallMPI(MPI_Comm_size(comm, &size)); 4858 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4859 4860 PetscCall(PetscNew(&merge)); 4861 PetscCall(PetscMalloc1(size, &status)); 4862 4863 /* determine row ownership */ 4864 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4865 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4866 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4867 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4868 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4869 PetscCall(PetscMalloc1(size, &len_si)); 4870 PetscCall(PetscMalloc1(size, &merge->len_s)); 4871 4872 m = merge->rowmap->n; 4873 owners = merge->rowmap->range; 4874 4875 /* determine the number of messages to send, their lengths */ 4876 len_s = merge->len_s; 4877 4878 len = 0; /* length of buf_si[] */ 4879 merge->nsend = 0; 4880 for (proc = 0; proc < size; proc++) { 4881 len_si[proc] = 0; 4882 if (proc == rank) { 4883 len_s[proc] = 0; 4884 } else { 4885 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4886 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4887 } 4888 if (len_s[proc]) { 4889 merge->nsend++; 4890 nrows = 0; 4891 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4892 if (ai[i + 1] > ai[i]) nrows++; 4893 } 4894 len_si[proc] = 2 * (nrows + 1); 4895 len += len_si[proc]; 4896 } 4897 } 4898 4899 /* determine the number and length of messages to receive for ij-structure */ 4900 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4901 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4902 4903 /* post the Irecv of j-structure */ 4904 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4905 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4906 4907 /* post the Isend of j-structure */ 4908 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4909 4910 for (proc = 0, k = 0; proc < size; proc++) { 4911 if (!len_s[proc]) continue; 4912 i = owners[proc]; 4913 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4914 k++; 4915 } 4916 4917 /* receives and sends of j-structure are complete */ 4918 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4919 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4920 4921 /* send and recv i-structure */ 4922 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4923 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4924 4925 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4926 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4927 for (proc = 0, k = 0; proc < size; proc++) { 4928 if (!len_s[proc]) continue; 4929 /* form outgoing message for i-structure: 4930 buf_si[0]: nrows to be sent 4931 [1:nrows]: row index (global) 4932 [nrows+1:2*nrows+1]: i-structure index 4933 */ 4934 nrows = len_si[proc] / 2 - 1; 4935 buf_si_i = buf_si + nrows + 1; 4936 buf_si[0] = nrows; 4937 buf_si_i[0] = 0; 4938 nrows = 0; 4939 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4940 anzi = ai[i + 1] - ai[i]; 4941 if (anzi) { 4942 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4943 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4944 nrows++; 4945 } 4946 } 4947 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4948 k++; 4949 buf_si += len_si[proc]; 4950 } 4951 4952 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4953 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4954 4955 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4956 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4957 4958 PetscCall(PetscFree(len_si)); 4959 PetscCall(PetscFree(len_ri)); 4960 PetscCall(PetscFree(rj_waits)); 4961 PetscCall(PetscFree2(si_waits, sj_waits)); 4962 PetscCall(PetscFree(ri_waits)); 4963 PetscCall(PetscFree(buf_s)); 4964 PetscCall(PetscFree(status)); 4965 4966 /* compute a local seq matrix in each processor */ 4967 /* allocate bi array and free space for accumulating nonzero column info */ 4968 PetscCall(PetscMalloc1(m + 1, &bi)); 4969 bi[0] = 0; 4970 4971 /* create and initialize a linked list */ 4972 nlnk = N + 1; 4973 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4974 4975 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4976 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4977 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4978 4979 current_space = free_space; 4980 4981 /* determine symbolic info for each local row */ 4982 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4983 4984 for (k = 0; k < merge->nrecv; k++) { 4985 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4986 nrows = *buf_ri_k[k]; 4987 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4988 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4989 } 4990 4991 MatPreallocateBegin(comm, m, n, dnz, onz); 4992 len = 0; 4993 for (i = 0; i < m; i++) { 4994 bnzi = 0; 4995 /* add local non-zero cols of this proc's seqmat into lnk */ 4996 arow = owners[rank] + i; 4997 anzi = ai[arow + 1] - ai[arow]; 4998 aj = a->j + ai[arow]; 4999 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5000 bnzi += nlnk; 5001 /* add received col data into lnk */ 5002 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5003 if (i == *nextrow[k]) { /* i-th row */ 5004 anzi = *(nextai[k] + 1) - *nextai[k]; 5005 aj = buf_rj[k] + *nextai[k]; 5006 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5007 bnzi += nlnk; 5008 nextrow[k]++; 5009 nextai[k]++; 5010 } 5011 } 5012 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5013 5014 /* if free space is not available, make more free space */ 5015 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5016 /* copy data into free space, then initialize lnk */ 5017 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5018 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5019 5020 current_space->array += bnzi; 5021 current_space->local_used += bnzi; 5022 current_space->local_remaining -= bnzi; 5023 5024 bi[i + 1] = bi[i] + bnzi; 5025 } 5026 5027 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5028 5029 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5030 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5031 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5032 5033 /* create symbolic parallel matrix B_mpi */ 5034 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5035 PetscCall(MatCreate(comm, &B_mpi)); 5036 if (n == PETSC_DECIDE) { 5037 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5038 } else { 5039 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5040 } 5041 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5042 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5043 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5044 MatPreallocateEnd(dnz, onz); 5045 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5046 5047 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5048 B_mpi->assembled = PETSC_FALSE; 5049 merge->bi = bi; 5050 merge->bj = bj; 5051 merge->buf_ri = buf_ri; 5052 merge->buf_rj = buf_rj; 5053 merge->coi = NULL; 5054 merge->coj = NULL; 5055 merge->owners_co = NULL; 5056 5057 PetscCall(PetscCommDestroy(&comm)); 5058 5059 /* attach the supporting struct to B_mpi for reuse */ 5060 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5061 PetscCall(PetscContainerSetPointer(container, merge)); 5062 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5063 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5064 PetscCall(PetscContainerDestroy(&container)); 5065 *mpimat = B_mpi; 5066 5067 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5068 PetscFunctionReturn(PETSC_SUCCESS); 5069 } 5070 5071 /*@C 5072 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5073 matrices from each processor 5074 5075 Collective 5076 5077 Input Parameters: 5078 + comm - the communicators the parallel matrix will live on 5079 . seqmat - the input sequential matrices 5080 . m - number of local rows (or `PETSC_DECIDE`) 5081 . n - number of local columns (or `PETSC_DECIDE`) 5082 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5083 5084 Output Parameter: 5085 . mpimat - the parallel matrix generated 5086 5087 Level: advanced 5088 5089 Note: 5090 The dimensions of the sequential matrix in each processor MUST be the same. 5091 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5092 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5093 5094 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5095 @*/ 5096 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5097 { 5098 PetscMPIInt size; 5099 5100 PetscFunctionBegin; 5101 PetscCallMPI(MPI_Comm_size(comm, &size)); 5102 if (size == 1) { 5103 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5104 if (scall == MAT_INITIAL_MATRIX) { 5105 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5106 } else { 5107 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5108 } 5109 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5110 PetscFunctionReturn(PETSC_SUCCESS); 5111 } 5112 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5113 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5114 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5115 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5116 PetscFunctionReturn(PETSC_SUCCESS); 5117 } 5118 5119 /*@ 5120 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5121 5122 Not Collective 5123 5124 Input Parameter: 5125 . A - the matrix 5126 5127 Output Parameter: 5128 . A_loc - the local sequential matrix generated 5129 5130 Level: developer 5131 5132 Notes: 5133 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5134 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5135 `n` is the global column count obtained with `MatGetSize()` 5136 5137 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5138 5139 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5140 5141 Destroy the matrix with `MatDestroy()` 5142 5143 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5144 @*/ 5145 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5146 { 5147 PetscBool mpi; 5148 5149 PetscFunctionBegin; 5150 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5151 if (mpi) { 5152 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5153 } else { 5154 *A_loc = A; 5155 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5156 } 5157 PetscFunctionReturn(PETSC_SUCCESS); 5158 } 5159 5160 /*@ 5161 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5162 5163 Not Collective 5164 5165 Input Parameters: 5166 + A - the matrix 5167 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5168 5169 Output Parameter: 5170 . A_loc - the local sequential matrix generated 5171 5172 Level: developer 5173 5174 Notes: 5175 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5176 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5177 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5178 5179 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5180 5181 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5182 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5183 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5184 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5185 5186 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5187 @*/ 5188 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5189 { 5190 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5191 Mat_SeqAIJ *mat, *a, *b; 5192 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5193 const PetscScalar *aa, *ba, *aav, *bav; 5194 PetscScalar *ca, *cam; 5195 PetscMPIInt size; 5196 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5197 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5198 PetscBool match; 5199 5200 PetscFunctionBegin; 5201 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5202 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5203 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5204 if (size == 1) { 5205 if (scall == MAT_INITIAL_MATRIX) { 5206 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5207 *A_loc = mpimat->A; 5208 } else if (scall == MAT_REUSE_MATRIX) { 5209 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5210 } 5211 PetscFunctionReturn(PETSC_SUCCESS); 5212 } 5213 5214 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5215 a = (Mat_SeqAIJ *)mpimat->A->data; 5216 b = (Mat_SeqAIJ *)mpimat->B->data; 5217 ai = a->i; 5218 aj = a->j; 5219 bi = b->i; 5220 bj = b->j; 5221 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5222 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5223 aa = aav; 5224 ba = bav; 5225 if (scall == MAT_INITIAL_MATRIX) { 5226 PetscCall(PetscMalloc1(1 + am, &ci)); 5227 ci[0] = 0; 5228 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5229 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5230 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5231 k = 0; 5232 for (i = 0; i < am; i++) { 5233 ncols_o = bi[i + 1] - bi[i]; 5234 ncols_d = ai[i + 1] - ai[i]; 5235 /* off-diagonal portion of A */ 5236 for (jo = 0; jo < ncols_o; jo++) { 5237 col = cmap[*bj]; 5238 if (col >= cstart) break; 5239 cj[k] = col; 5240 bj++; 5241 ca[k++] = *ba++; 5242 } 5243 /* diagonal portion of A */ 5244 for (j = 0; j < ncols_d; j++) { 5245 cj[k] = cstart + *aj++; 5246 ca[k++] = *aa++; 5247 } 5248 /* off-diagonal portion of A */ 5249 for (j = jo; j < ncols_o; j++) { 5250 cj[k] = cmap[*bj++]; 5251 ca[k++] = *ba++; 5252 } 5253 } 5254 /* put together the new matrix */ 5255 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5256 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5257 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5258 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5259 mat->free_a = PETSC_TRUE; 5260 mat->free_ij = PETSC_TRUE; 5261 mat->nonew = 0; 5262 } else if (scall == MAT_REUSE_MATRIX) { 5263 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5264 ci = mat->i; 5265 cj = mat->j; 5266 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5267 for (i = 0; i < am; i++) { 5268 /* off-diagonal portion of A */ 5269 ncols_o = bi[i + 1] - bi[i]; 5270 for (jo = 0; jo < ncols_o; jo++) { 5271 col = cmap[*bj]; 5272 if (col >= cstart) break; 5273 *cam++ = *ba++; 5274 bj++; 5275 } 5276 /* diagonal portion of A */ 5277 ncols_d = ai[i + 1] - ai[i]; 5278 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5279 /* off-diagonal portion of A */ 5280 for (j = jo; j < ncols_o; j++) { 5281 *cam++ = *ba++; 5282 bj++; 5283 } 5284 } 5285 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5286 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5287 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5288 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5289 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5290 PetscFunctionReturn(PETSC_SUCCESS); 5291 } 5292 5293 /*@ 5294 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5295 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5296 5297 Not Collective 5298 5299 Input Parameters: 5300 + A - the matrix 5301 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5302 5303 Output Parameters: 5304 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5305 - A_loc - the local sequential matrix generated 5306 5307 Level: developer 5308 5309 Note: 5310 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5311 part, then those associated with the off-diagonal part (in its local ordering) 5312 5313 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5314 @*/ 5315 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5316 { 5317 Mat Ao, Ad; 5318 const PetscInt *cmap; 5319 PetscMPIInt size; 5320 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5321 5322 PetscFunctionBegin; 5323 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5324 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5325 if (size == 1) { 5326 if (scall == MAT_INITIAL_MATRIX) { 5327 PetscCall(PetscObjectReference((PetscObject)Ad)); 5328 *A_loc = Ad; 5329 } else if (scall == MAT_REUSE_MATRIX) { 5330 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5331 } 5332 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5333 PetscFunctionReturn(PETSC_SUCCESS); 5334 } 5335 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5336 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5337 if (f) { 5338 PetscCall((*f)(A, scall, glob, A_loc)); 5339 } else { 5340 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5341 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5342 Mat_SeqAIJ *c; 5343 PetscInt *ai = a->i, *aj = a->j; 5344 PetscInt *bi = b->i, *bj = b->j; 5345 PetscInt *ci, *cj; 5346 const PetscScalar *aa, *ba; 5347 PetscScalar *ca; 5348 PetscInt i, j, am, dn, on; 5349 5350 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5351 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5352 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5353 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5354 if (scall == MAT_INITIAL_MATRIX) { 5355 PetscInt k; 5356 PetscCall(PetscMalloc1(1 + am, &ci)); 5357 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5358 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5359 ci[0] = 0; 5360 for (i = 0, k = 0; i < am; i++) { 5361 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5362 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5363 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5364 /* diagonal portion of A */ 5365 for (j = 0; j < ncols_d; j++, k++) { 5366 cj[k] = *aj++; 5367 ca[k] = *aa++; 5368 } 5369 /* off-diagonal portion of A */ 5370 for (j = 0; j < ncols_o; j++, k++) { 5371 cj[k] = dn + *bj++; 5372 ca[k] = *ba++; 5373 } 5374 } 5375 /* put together the new matrix */ 5376 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5377 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5378 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5379 c = (Mat_SeqAIJ *)(*A_loc)->data; 5380 c->free_a = PETSC_TRUE; 5381 c->free_ij = PETSC_TRUE; 5382 c->nonew = 0; 5383 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5384 } else if (scall == MAT_REUSE_MATRIX) { 5385 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5386 for (i = 0; i < am; i++) { 5387 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5388 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5389 /* diagonal portion of A */ 5390 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5391 /* off-diagonal portion of A */ 5392 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5393 } 5394 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5395 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5396 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5397 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5398 if (glob) { 5399 PetscInt cst, *gidx; 5400 5401 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5402 PetscCall(PetscMalloc1(dn + on, &gidx)); 5403 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5404 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5405 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5406 } 5407 } 5408 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5409 PetscFunctionReturn(PETSC_SUCCESS); 5410 } 5411 5412 /*@C 5413 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5414 5415 Not Collective 5416 5417 Input Parameters: 5418 + A - the matrix 5419 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5420 . row - index set of rows to extract (or `NULL`) 5421 - col - index set of columns to extract (or `NULL`) 5422 5423 Output Parameter: 5424 . A_loc - the local sequential matrix generated 5425 5426 Level: developer 5427 5428 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5429 @*/ 5430 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5431 { 5432 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5433 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5434 IS isrowa, iscola; 5435 Mat *aloc; 5436 PetscBool match; 5437 5438 PetscFunctionBegin; 5439 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5440 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5441 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5442 if (!row) { 5443 start = A->rmap->rstart; 5444 end = A->rmap->rend; 5445 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5446 } else { 5447 isrowa = *row; 5448 } 5449 if (!col) { 5450 start = A->cmap->rstart; 5451 cmap = a->garray; 5452 nzA = a->A->cmap->n; 5453 nzB = a->B->cmap->n; 5454 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5455 ncols = 0; 5456 for (i = 0; i < nzB; i++) { 5457 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5458 else break; 5459 } 5460 imark = i; 5461 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5462 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5463 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5464 } else { 5465 iscola = *col; 5466 } 5467 if (scall != MAT_INITIAL_MATRIX) { 5468 PetscCall(PetscMalloc1(1, &aloc)); 5469 aloc[0] = *A_loc; 5470 } 5471 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5472 if (!col) { /* attach global id of condensed columns */ 5473 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5474 } 5475 *A_loc = aloc[0]; 5476 PetscCall(PetscFree(aloc)); 5477 if (!row) PetscCall(ISDestroy(&isrowa)); 5478 if (!col) PetscCall(ISDestroy(&iscola)); 5479 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5480 PetscFunctionReturn(PETSC_SUCCESS); 5481 } 5482 5483 /* 5484 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5485 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5486 * on a global size. 5487 * */ 5488 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5489 { 5490 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5491 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5492 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5493 PetscMPIInt owner; 5494 PetscSFNode *iremote, *oiremote; 5495 const PetscInt *lrowindices; 5496 PetscSF sf, osf; 5497 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5498 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5499 MPI_Comm comm; 5500 ISLocalToGlobalMapping mapping; 5501 const PetscScalar *pd_a, *po_a; 5502 5503 PetscFunctionBegin; 5504 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5505 /* plocalsize is the number of roots 5506 * nrows is the number of leaves 5507 * */ 5508 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5509 PetscCall(ISGetLocalSize(rows, &nrows)); 5510 PetscCall(PetscCalloc1(nrows, &iremote)); 5511 PetscCall(ISGetIndices(rows, &lrowindices)); 5512 for (i = 0; i < nrows; i++) { 5513 /* Find a remote index and an owner for a row 5514 * The row could be local or remote 5515 * */ 5516 owner = 0; 5517 lidx = 0; 5518 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5519 iremote[i].index = lidx; 5520 iremote[i].rank = owner; 5521 } 5522 /* Create SF to communicate how many nonzero columns for each row */ 5523 PetscCall(PetscSFCreate(comm, &sf)); 5524 /* SF will figure out the number of nonzero columns for each row, and their 5525 * offsets 5526 * */ 5527 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5528 PetscCall(PetscSFSetFromOptions(sf)); 5529 PetscCall(PetscSFSetUp(sf)); 5530 5531 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5532 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5533 PetscCall(PetscCalloc1(nrows, &pnnz)); 5534 roffsets[0] = 0; 5535 roffsets[1] = 0; 5536 for (i = 0; i < plocalsize; i++) { 5537 /* diagonal */ 5538 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5539 /* off-diagonal */ 5540 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5541 /* compute offsets so that we relative location for each row */ 5542 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5543 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5544 } 5545 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5546 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5547 /* 'r' means root, and 'l' means leaf */ 5548 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5550 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5551 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5552 PetscCall(PetscSFDestroy(&sf)); 5553 PetscCall(PetscFree(roffsets)); 5554 PetscCall(PetscFree(nrcols)); 5555 dntotalcols = 0; 5556 ontotalcols = 0; 5557 ncol = 0; 5558 for (i = 0; i < nrows; i++) { 5559 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5560 ncol = PetscMax(pnnz[i], ncol); 5561 /* diagonal */ 5562 dntotalcols += nlcols[i * 2 + 0]; 5563 /* off-diagonal */ 5564 ontotalcols += nlcols[i * 2 + 1]; 5565 } 5566 /* We do not need to figure the right number of columns 5567 * since all the calculations will be done by going through the raw data 5568 * */ 5569 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5570 PetscCall(MatSetUp(*P_oth)); 5571 PetscCall(PetscFree(pnnz)); 5572 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5573 /* diagonal */ 5574 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5575 /* off-diagonal */ 5576 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5577 /* diagonal */ 5578 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5579 /* off-diagonal */ 5580 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5581 dntotalcols = 0; 5582 ontotalcols = 0; 5583 ntotalcols = 0; 5584 for (i = 0; i < nrows; i++) { 5585 owner = 0; 5586 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5587 /* Set iremote for diag matrix */ 5588 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5589 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5590 iremote[dntotalcols].rank = owner; 5591 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5592 ilocal[dntotalcols++] = ntotalcols++; 5593 } 5594 /* off-diagonal */ 5595 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5596 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5597 oiremote[ontotalcols].rank = owner; 5598 oilocal[ontotalcols++] = ntotalcols++; 5599 } 5600 } 5601 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5602 PetscCall(PetscFree(loffsets)); 5603 PetscCall(PetscFree(nlcols)); 5604 PetscCall(PetscSFCreate(comm, &sf)); 5605 /* P serves as roots and P_oth is leaves 5606 * Diag matrix 5607 * */ 5608 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5609 PetscCall(PetscSFSetFromOptions(sf)); 5610 PetscCall(PetscSFSetUp(sf)); 5611 5612 PetscCall(PetscSFCreate(comm, &osf)); 5613 /* off-diagonal */ 5614 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5615 PetscCall(PetscSFSetFromOptions(osf)); 5616 PetscCall(PetscSFSetUp(osf)); 5617 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5618 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5619 /* operate on the matrix internal data to save memory */ 5620 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5621 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5622 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5623 /* Convert to global indices for diag matrix */ 5624 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5625 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5626 /* We want P_oth store global indices */ 5627 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5628 /* Use memory scalable approach */ 5629 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5630 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5631 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5632 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5633 /* Convert back to local indices */ 5634 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5635 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5636 nout = 0; 5637 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5638 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5639 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5640 /* Exchange values */ 5641 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5644 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5645 /* Stop PETSc from shrinking memory */ 5646 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5647 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5648 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5649 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5650 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5651 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5652 PetscCall(PetscSFDestroy(&sf)); 5653 PetscCall(PetscSFDestroy(&osf)); 5654 PetscFunctionReturn(PETSC_SUCCESS); 5655 } 5656 5657 /* 5658 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5659 * This supports MPIAIJ and MAIJ 5660 * */ 5661 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5662 { 5663 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5664 Mat_SeqAIJ *p_oth; 5665 IS rows, map; 5666 PetscHMapI hamp; 5667 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5668 MPI_Comm comm; 5669 PetscSF sf, osf; 5670 PetscBool has; 5671 5672 PetscFunctionBegin; 5673 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5674 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5675 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5676 * and then create a submatrix (that often is an overlapping matrix) 5677 * */ 5678 if (reuse == MAT_INITIAL_MATRIX) { 5679 /* Use a hash table to figure out unique keys */ 5680 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5681 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5682 count = 0; 5683 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5684 for (i = 0; i < a->B->cmap->n; i++) { 5685 key = a->garray[i] / dof; 5686 PetscCall(PetscHMapIHas(hamp, key, &has)); 5687 if (!has) { 5688 mapping[i] = count; 5689 PetscCall(PetscHMapISet(hamp, key, count++)); 5690 } else { 5691 /* Current 'i' has the same value the previous step */ 5692 mapping[i] = count - 1; 5693 } 5694 } 5695 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5696 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5697 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5698 PetscCall(PetscCalloc1(htsize, &rowindices)); 5699 off = 0; 5700 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5701 PetscCall(PetscHMapIDestroy(&hamp)); 5702 PetscCall(PetscSortInt(htsize, rowindices)); 5703 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5704 /* In case, the matrix was already created but users want to recreate the matrix */ 5705 PetscCall(MatDestroy(P_oth)); 5706 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5707 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5708 PetscCall(ISDestroy(&map)); 5709 PetscCall(ISDestroy(&rows)); 5710 } else if (reuse == MAT_REUSE_MATRIX) { 5711 /* If matrix was already created, we simply update values using SF objects 5712 * that as attached to the matrix earlier. 5713 */ 5714 const PetscScalar *pd_a, *po_a; 5715 5716 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5717 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5718 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5719 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5720 /* Update values in place */ 5721 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5722 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5723 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5726 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5728 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5729 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5730 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5731 PetscFunctionReturn(PETSC_SUCCESS); 5732 } 5733 5734 /*@C 5735 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5736 5737 Collective 5738 5739 Input Parameters: 5740 + A - the first matrix in `MATMPIAIJ` format 5741 . B - the second matrix in `MATMPIAIJ` format 5742 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5743 5744 Output Parameters: 5745 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5746 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5747 - B_seq - the sequential matrix generated 5748 5749 Level: developer 5750 5751 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5752 @*/ 5753 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5754 { 5755 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5756 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5757 IS isrowb, iscolb; 5758 Mat *bseq = NULL; 5759 5760 PetscFunctionBegin; 5761 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5762 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5763 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5764 5765 if (scall == MAT_INITIAL_MATRIX) { 5766 start = A->cmap->rstart; 5767 cmap = a->garray; 5768 nzA = a->A->cmap->n; 5769 nzB = a->B->cmap->n; 5770 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5771 ncols = 0; 5772 for (i = 0; i < nzB; i++) { /* row < local row index */ 5773 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5774 else break; 5775 } 5776 imark = i; 5777 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5778 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5779 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5780 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5781 } else { 5782 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5783 isrowb = *rowb; 5784 iscolb = *colb; 5785 PetscCall(PetscMalloc1(1, &bseq)); 5786 bseq[0] = *B_seq; 5787 } 5788 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5789 *B_seq = bseq[0]; 5790 PetscCall(PetscFree(bseq)); 5791 if (!rowb) { 5792 PetscCall(ISDestroy(&isrowb)); 5793 } else { 5794 *rowb = isrowb; 5795 } 5796 if (!colb) { 5797 PetscCall(ISDestroy(&iscolb)); 5798 } else { 5799 *colb = iscolb; 5800 } 5801 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5802 PetscFunctionReturn(PETSC_SUCCESS); 5803 } 5804 5805 /* 5806 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5807 of the OFF-DIAGONAL portion of local A 5808 5809 Collective 5810 5811 Input Parameters: 5812 + A,B - the matrices in `MATMPIAIJ` format 5813 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5814 5815 Output Parameter: 5816 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5817 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5818 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5819 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5820 5821 Developer Note: 5822 This directly accesses information inside the VecScatter associated with the matrix-vector product 5823 for this matrix. This is not desirable.. 5824 5825 Level: developer 5826 5827 */ 5828 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5829 { 5830 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5831 Mat_SeqAIJ *b_oth; 5832 VecScatter ctx; 5833 MPI_Comm comm; 5834 const PetscMPIInt *rprocs, *sprocs; 5835 const PetscInt *srow, *rstarts, *sstarts; 5836 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5837 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5838 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5839 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5840 PetscMPIInt size, tag, rank, nreqs; 5841 5842 PetscFunctionBegin; 5843 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5844 PetscCallMPI(MPI_Comm_size(comm, &size)); 5845 5846 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5847 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5848 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5849 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5850 5851 if (size == 1) { 5852 startsj_s = NULL; 5853 bufa_ptr = NULL; 5854 *B_oth = NULL; 5855 PetscFunctionReturn(PETSC_SUCCESS); 5856 } 5857 5858 ctx = a->Mvctx; 5859 tag = ((PetscObject)ctx)->tag; 5860 5861 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5862 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5863 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5864 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5865 PetscCall(PetscMalloc1(nreqs, &reqs)); 5866 rwaits = reqs; 5867 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5868 5869 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* i-array */ 5872 /* post receives */ 5873 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5874 for (i = 0; i < nrecvs; i++) { 5875 rowlen = rvalues + rstarts[i] * rbs; 5876 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5877 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5878 } 5879 5880 /* pack the outgoing message */ 5881 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5882 5883 sstartsj[0] = 0; 5884 rstartsj[0] = 0; 5885 len = 0; /* total length of j or a array to be sent */ 5886 if (nsends) { 5887 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5888 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5889 } 5890 for (i = 0; i < nsends; i++) { 5891 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5892 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5893 for (j = 0; j < nrows; j++) { 5894 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5895 for (l = 0; l < sbs; l++) { 5896 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5897 5898 rowlen[j * sbs + l] = ncols; 5899 5900 len += ncols; 5901 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5902 } 5903 k++; 5904 } 5905 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5906 5907 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5908 } 5909 /* recvs and sends of i-array are completed */ 5910 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5911 PetscCall(PetscFree(svalues)); 5912 5913 /* allocate buffers for sending j and a arrays */ 5914 PetscCall(PetscMalloc1(len + 1, &bufj)); 5915 PetscCall(PetscMalloc1(len + 1, &bufa)); 5916 5917 /* create i-array of B_oth */ 5918 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5919 5920 b_othi[0] = 0; 5921 len = 0; /* total length of j or a array to be received */ 5922 k = 0; 5923 for (i = 0; i < nrecvs; i++) { 5924 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5925 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5926 for (j = 0; j < nrows; j++) { 5927 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5928 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5929 k++; 5930 } 5931 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5932 } 5933 PetscCall(PetscFree(rvalues)); 5934 5935 /* allocate space for j and a arrays of B_oth */ 5936 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5937 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5938 5939 /* j-array */ 5940 /* post receives of j-array */ 5941 for (i = 0; i < nrecvs; i++) { 5942 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5943 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5944 } 5945 5946 /* pack the outgoing message j-array */ 5947 if (nsends) k = sstarts[0]; 5948 for (i = 0; i < nsends; i++) { 5949 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5950 bufJ = bufj + sstartsj[i]; 5951 for (j = 0; j < nrows; j++) { 5952 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5953 for (ll = 0; ll < sbs; ll++) { 5954 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5955 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5956 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5957 } 5958 } 5959 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5960 } 5961 5962 /* recvs and sends of j-array are completed */ 5963 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5964 } else if (scall == MAT_REUSE_MATRIX) { 5965 sstartsj = *startsj_s; 5966 rstartsj = *startsj_r; 5967 bufa = *bufa_ptr; 5968 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5969 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5970 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5971 5972 /* a-array */ 5973 /* post receives of a-array */ 5974 for (i = 0; i < nrecvs; i++) { 5975 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5976 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5977 } 5978 5979 /* pack the outgoing message a-array */ 5980 if (nsends) k = sstarts[0]; 5981 for (i = 0; i < nsends; i++) { 5982 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5983 bufA = bufa + sstartsj[i]; 5984 for (j = 0; j < nrows; j++) { 5985 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5986 for (ll = 0; ll < sbs; ll++) { 5987 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5988 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5989 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5990 } 5991 } 5992 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5993 } 5994 /* recvs and sends of a-array are completed */ 5995 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5996 PetscCall(PetscFree(reqs)); 5997 5998 if (scall == MAT_INITIAL_MATRIX) { 5999 /* put together the new matrix */ 6000 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6001 6002 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6003 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6004 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6005 b_oth->free_a = PETSC_TRUE; 6006 b_oth->free_ij = PETSC_TRUE; 6007 b_oth->nonew = 0; 6008 6009 PetscCall(PetscFree(bufj)); 6010 if (!startsj_s || !bufa_ptr) { 6011 PetscCall(PetscFree2(sstartsj, rstartsj)); 6012 PetscCall(PetscFree(bufa_ptr)); 6013 } else { 6014 *startsj_s = sstartsj; 6015 *startsj_r = rstartsj; 6016 *bufa_ptr = bufa; 6017 } 6018 } else if (scall == MAT_REUSE_MATRIX) { 6019 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6020 } 6021 6022 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6023 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6024 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6025 PetscFunctionReturn(PETSC_SUCCESS); 6026 } 6027 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6029 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6031 #if defined(PETSC_HAVE_MKL_SPARSE) 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6033 #endif 6034 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6036 #if defined(PETSC_HAVE_ELEMENTAL) 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6038 #endif 6039 #if defined(PETSC_HAVE_SCALAPACK) 6040 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_HYPRE) 6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_CUDA) 6046 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_HIP) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6055 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6056 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6057 6058 /* 6059 Computes (B'*A')' since computing B*A directly is untenable 6060 6061 n p p 6062 [ ] [ ] [ ] 6063 m [ A ] * n [ B ] = m [ C ] 6064 [ ] [ ] [ ] 6065 6066 */ 6067 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6068 { 6069 Mat At, Bt, Ct; 6070 6071 PetscFunctionBegin; 6072 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6073 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6074 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6075 PetscCall(MatDestroy(&At)); 6076 PetscCall(MatDestroy(&Bt)); 6077 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6078 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6079 PetscCall(MatDestroy(&Ct)); 6080 PetscFunctionReturn(PETSC_SUCCESS); 6081 } 6082 6083 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6084 { 6085 PetscBool cisdense; 6086 6087 PetscFunctionBegin; 6088 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6089 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6090 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6091 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6092 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6093 PetscCall(MatSetUp(C)); 6094 6095 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6096 PetscFunctionReturn(PETSC_SUCCESS); 6097 } 6098 6099 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6100 { 6101 Mat_Product *product = C->product; 6102 Mat A = product->A, B = product->B; 6103 6104 PetscFunctionBegin; 6105 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6106 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6107 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6108 C->ops->productsymbolic = MatProductSymbolic_AB; 6109 PetscFunctionReturn(PETSC_SUCCESS); 6110 } 6111 6112 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6113 { 6114 Mat_Product *product = C->product; 6115 6116 PetscFunctionBegin; 6117 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6118 PetscFunctionReturn(PETSC_SUCCESS); 6119 } 6120 6121 /* 6122 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6123 6124 Input Parameters: 6125 6126 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6127 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6128 6129 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6130 6131 For Set1, j1[] contains column indices of the nonzeros. 6132 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6133 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6134 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6135 6136 Similar for Set2. 6137 6138 This routine merges the two sets of nonzeros row by row and removes repeats. 6139 6140 Output Parameters: (memory is allocated by the caller) 6141 6142 i[],j[]: the CSR of the merged matrix, which has m rows. 6143 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6144 imap2[]: similar to imap1[], but for Set2. 6145 Note we order nonzeros row-by-row and from left to right. 6146 */ 6147 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6148 { 6149 PetscInt r, m; /* Row index of mat */ 6150 PetscCount t, t1, t2, b1, e1, b2, e2; 6151 6152 PetscFunctionBegin; 6153 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6154 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6155 i[0] = 0; 6156 for (r = 0; r < m; r++) { /* Do row by row merging */ 6157 b1 = rowBegin1[r]; 6158 e1 = rowEnd1[r]; 6159 b2 = rowBegin2[r]; 6160 e2 = rowEnd2[r]; 6161 while (b1 < e1 && b2 < e2) { 6162 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6163 j[t] = j1[b1]; 6164 imap1[t1] = t; 6165 imap2[t2] = t; 6166 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6167 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6168 t1++; 6169 t2++; 6170 t++; 6171 } else if (j1[b1] < j2[b2]) { 6172 j[t] = j1[b1]; 6173 imap1[t1] = t; 6174 b1 += jmap1[t1 + 1] - jmap1[t1]; 6175 t1++; 6176 t++; 6177 } else { 6178 j[t] = j2[b2]; 6179 imap2[t2] = t; 6180 b2 += jmap2[t2 + 1] - jmap2[t2]; 6181 t2++; 6182 t++; 6183 } 6184 } 6185 /* Merge the remaining in either j1[] or j2[] */ 6186 while (b1 < e1) { 6187 j[t] = j1[b1]; 6188 imap1[t1] = t; 6189 b1 += jmap1[t1 + 1] - jmap1[t1]; 6190 t1++; 6191 t++; 6192 } 6193 while (b2 < e2) { 6194 j[t] = j2[b2]; 6195 imap2[t2] = t; 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; 6197 t2++; 6198 t++; 6199 } 6200 i[r + 1] = t; 6201 } 6202 PetscFunctionReturn(PETSC_SUCCESS); 6203 } 6204 6205 /* 6206 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6207 6208 Input Parameters: 6209 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6210 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6211 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6212 6213 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6214 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6215 6216 Output Parameters: 6217 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6218 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6219 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6220 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6221 6222 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6223 Atot: number of entries belonging to the diagonal block. 6224 Annz: number of unique nonzeros belonging to the diagonal block. 6225 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6226 repeats (i.e., same 'i,j' pair). 6227 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6228 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6229 6230 Atot: number of entries belonging to the diagonal block 6231 Annz: number of unique nonzeros belonging to the diagonal block. 6232 6233 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6234 6235 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6236 */ 6237 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6238 { 6239 PetscInt cstart, cend, rstart, rend, row, col; 6240 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6241 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6242 PetscCount k, m, p, q, r, s, mid; 6243 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6244 6245 PetscFunctionBegin; 6246 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6247 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6248 m = rend - rstart; 6249 6250 /* Skip negative rows */ 6251 for (k = 0; k < n; k++) 6252 if (i[k] >= 0) break; 6253 6254 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6255 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6256 */ 6257 while (k < n) { 6258 row = i[k]; 6259 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6260 for (s = k; s < n; s++) 6261 if (i[s] != row) break; 6262 6263 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6264 for (p = k; p < s; p++) { 6265 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6266 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6267 } 6268 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6269 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6270 rowBegin[row - rstart] = k; 6271 rowMid[row - rstart] = mid; 6272 rowEnd[row - rstart] = s; 6273 6274 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6275 Atot += mid - k; 6276 Btot += s - mid; 6277 6278 /* Count unique nonzeros of this diag row */ 6279 for (p = k; p < mid;) { 6280 col = j[p]; 6281 do { 6282 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6283 p++; 6284 } while (p < mid && j[p] == col); 6285 Annz++; 6286 } 6287 6288 /* Count unique nonzeros of this offdiag row */ 6289 for (p = mid; p < s;) { 6290 col = j[p]; 6291 do { 6292 p++; 6293 } while (p < s && j[p] == col); 6294 Bnnz++; 6295 } 6296 k = s; 6297 } 6298 6299 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6300 PetscCall(PetscMalloc1(Atot, &Aperm)); 6301 PetscCall(PetscMalloc1(Btot, &Bperm)); 6302 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6303 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6304 6305 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6306 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6307 for (r = 0; r < m; r++) { 6308 k = rowBegin[r]; 6309 mid = rowMid[r]; 6310 s = rowEnd[r]; 6311 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6312 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6313 Atot += mid - k; 6314 Btot += s - mid; 6315 6316 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6317 for (p = k; p < mid;) { 6318 col = j[p]; 6319 q = p; 6320 do { 6321 p++; 6322 } while (p < mid && j[p] == col); 6323 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6324 Annz++; 6325 } 6326 6327 for (p = mid; p < s;) { 6328 col = j[p]; 6329 q = p; 6330 do { 6331 p++; 6332 } while (p < s && j[p] == col); 6333 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6334 Bnnz++; 6335 } 6336 } 6337 /* Output */ 6338 *Aperm_ = Aperm; 6339 *Annz_ = Annz; 6340 *Atot_ = Atot; 6341 *Ajmap_ = Ajmap; 6342 *Bperm_ = Bperm; 6343 *Bnnz_ = Bnnz; 6344 *Btot_ = Btot; 6345 *Bjmap_ = Bjmap; 6346 PetscFunctionReturn(PETSC_SUCCESS); 6347 } 6348 6349 /* 6350 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6351 6352 Input Parameters: 6353 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6354 nnz: number of unique nonzeros in the merged matrix 6355 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6356 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6357 6358 Output Parameter: (memory is allocated by the caller) 6359 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6360 6361 Example: 6362 nnz1 = 4 6363 nnz = 6 6364 imap = [1,3,4,5] 6365 jmap = [0,3,5,6,7] 6366 then, 6367 jmap_new = [0,0,3,3,5,6,7] 6368 */ 6369 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6370 { 6371 PetscCount k, p; 6372 6373 PetscFunctionBegin; 6374 jmap_new[0] = 0; 6375 p = nnz; /* p loops over jmap_new[] backwards */ 6376 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6377 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6378 } 6379 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6380 PetscFunctionReturn(PETSC_SUCCESS); 6381 } 6382 6383 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6384 { 6385 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6386 6387 PetscFunctionBegin; 6388 PetscCall(PetscSFDestroy(&coo->sf)); 6389 PetscCall(PetscFree(coo->Aperm1)); 6390 PetscCall(PetscFree(coo->Bperm1)); 6391 PetscCall(PetscFree(coo->Ajmap1)); 6392 PetscCall(PetscFree(coo->Bjmap1)); 6393 PetscCall(PetscFree(coo->Aimap2)); 6394 PetscCall(PetscFree(coo->Bimap2)); 6395 PetscCall(PetscFree(coo->Aperm2)); 6396 PetscCall(PetscFree(coo->Bperm2)); 6397 PetscCall(PetscFree(coo->Ajmap2)); 6398 PetscCall(PetscFree(coo->Bjmap2)); 6399 PetscCall(PetscFree(coo->Cperm1)); 6400 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6401 PetscCall(PetscFree(coo)); 6402 PetscFunctionReturn(PETSC_SUCCESS); 6403 } 6404 6405 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6406 { 6407 MPI_Comm comm; 6408 PetscMPIInt rank, size; 6409 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6410 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6411 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6412 PetscContainer container; 6413 MatCOOStruct_MPIAIJ *coo; 6414 6415 PetscFunctionBegin; 6416 PetscCall(PetscFree(mpiaij->garray)); 6417 PetscCall(VecDestroy(&mpiaij->lvec)); 6418 #if defined(PETSC_USE_CTABLE) 6419 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6420 #else 6421 PetscCall(PetscFree(mpiaij->colmap)); 6422 #endif 6423 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6424 mat->assembled = PETSC_FALSE; 6425 mat->was_assembled = PETSC_FALSE; 6426 6427 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6428 PetscCallMPI(MPI_Comm_size(comm, &size)); 6429 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6430 PetscCall(PetscLayoutSetUp(mat->rmap)); 6431 PetscCall(PetscLayoutSetUp(mat->cmap)); 6432 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6433 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6434 PetscCall(MatGetLocalSize(mat, &m, &n)); 6435 PetscCall(MatGetSize(mat, &M, &N)); 6436 6437 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6438 /* entries come first, then local rows, then remote rows. */ 6439 PetscCount n1 = coo_n, *perm1; 6440 PetscInt *i1 = coo_i, *j1 = coo_j; 6441 6442 PetscCall(PetscMalloc1(n1, &perm1)); 6443 for (k = 0; k < n1; k++) perm1[k] = k; 6444 6445 /* Manipulate indices so that entries with negative row or col indices will have smallest 6446 row indices, local entries will have greater but negative row indices, and remote entries 6447 will have positive row indices. 6448 */ 6449 for (k = 0; k < n1; k++) { 6450 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6451 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6452 else { 6453 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6454 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6455 } 6456 } 6457 6458 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6459 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6460 6461 /* Advance k to the first entry we need to take care of */ 6462 for (k = 0; k < n1; k++) 6463 if (i1[k] > PETSC_MIN_INT) break; 6464 PetscInt i1start = k; 6465 6466 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6467 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6468 6469 /* Send remote rows to their owner */ 6470 /* Find which rows should be sent to which remote ranks*/ 6471 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6472 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6473 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6474 const PetscInt *ranges; 6475 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6476 6477 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6478 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6479 for (k = rem; k < n1;) { 6480 PetscMPIInt owner; 6481 PetscInt firstRow, lastRow; 6482 6483 /* Locate a row range */ 6484 firstRow = i1[k]; /* first row of this owner */ 6485 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6486 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6487 6488 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6489 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6490 6491 /* All entries in [k,p) belong to this remote owner */ 6492 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6493 PetscMPIInt *sendto2; 6494 PetscInt *nentries2; 6495 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6496 6497 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6498 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6499 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6500 PetscCall(PetscFree2(sendto, nentries2)); 6501 sendto = sendto2; 6502 nentries = nentries2; 6503 maxNsend = maxNsend2; 6504 } 6505 sendto[nsend] = owner; 6506 nentries[nsend] = p - k; 6507 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6508 nsend++; 6509 k = p; 6510 } 6511 6512 /* Build 1st SF to know offsets on remote to send data */ 6513 PetscSF sf1; 6514 PetscInt nroots = 1, nroots2 = 0; 6515 PetscInt nleaves = nsend, nleaves2 = 0; 6516 PetscInt *offsets; 6517 PetscSFNode *iremote; 6518 6519 PetscCall(PetscSFCreate(comm, &sf1)); 6520 PetscCall(PetscMalloc1(nsend, &iremote)); 6521 PetscCall(PetscMalloc1(nsend, &offsets)); 6522 for (k = 0; k < nsend; k++) { 6523 iremote[k].rank = sendto[k]; 6524 iremote[k].index = 0; 6525 nleaves2 += nentries[k]; 6526 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6527 } 6528 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6529 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6530 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6531 PetscCall(PetscSFDestroy(&sf1)); 6532 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6533 6534 /* Build 2nd SF to send remote COOs to their owner */ 6535 PetscSF sf2; 6536 nroots = nroots2; 6537 nleaves = nleaves2; 6538 PetscCall(PetscSFCreate(comm, &sf2)); 6539 PetscCall(PetscSFSetFromOptions(sf2)); 6540 PetscCall(PetscMalloc1(nleaves, &iremote)); 6541 p = 0; 6542 for (k = 0; k < nsend; k++) { 6543 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6544 for (q = 0; q < nentries[k]; q++, p++) { 6545 iremote[p].rank = sendto[k]; 6546 iremote[p].index = offsets[k] + q; 6547 } 6548 } 6549 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6550 6551 /* Send the remote COOs to their owner */ 6552 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6553 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6554 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6555 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6556 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6557 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6558 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6562 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6563 6564 PetscCall(PetscFree(offsets)); 6565 PetscCall(PetscFree2(sendto, nentries)); 6566 6567 /* Sort received COOs by row along with the permutation array */ 6568 for (k = 0; k < n2; k++) perm2[k] = k; 6569 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6570 6571 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6572 PetscCount *Cperm1; 6573 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6574 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6575 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6576 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6577 6578 /* Support for HYPRE matrices, kind of a hack. 6579 Swap min column with diagonal so that diagonal values will go first */ 6580 PetscBool hypre; 6581 const char *name; 6582 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6583 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6584 if (hypre) { 6585 PetscInt *minj; 6586 PetscBT hasdiag; 6587 6588 PetscCall(PetscBTCreate(m, &hasdiag)); 6589 PetscCall(PetscMalloc1(m, &minj)); 6590 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6591 for (k = i1start; k < rem; k++) { 6592 if (j1[k] < cstart || j1[k] >= cend) continue; 6593 const PetscInt rindex = i1[k] - rstart; 6594 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6595 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6596 } 6597 for (k = 0; k < n2; k++) { 6598 if (j2[k] < cstart || j2[k] >= cend) continue; 6599 const PetscInt rindex = i2[k] - rstart; 6600 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6601 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6602 } 6603 for (k = i1start; k < rem; k++) { 6604 const PetscInt rindex = i1[k] - rstart; 6605 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6606 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6607 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6608 } 6609 for (k = 0; k < n2; k++) { 6610 const PetscInt rindex = i2[k] - rstart; 6611 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6612 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6613 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6614 } 6615 PetscCall(PetscBTDestroy(&hasdiag)); 6616 PetscCall(PetscFree(minj)); 6617 } 6618 6619 /* Split local COOs and received COOs into diag/offdiag portions */ 6620 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6621 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6622 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6623 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6624 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6625 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6626 6627 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6628 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6629 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6630 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6631 6632 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6633 PetscInt *Ai, *Bi; 6634 PetscInt *Aj, *Bj; 6635 6636 PetscCall(PetscMalloc1(m + 1, &Ai)); 6637 PetscCall(PetscMalloc1(m + 1, &Bi)); 6638 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6639 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6640 6641 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6642 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6643 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6644 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6645 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6646 6647 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6648 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6649 6650 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6651 /* expect nonzeros in A/B most likely have local contributing entries */ 6652 PetscInt Annz = Ai[m]; 6653 PetscInt Bnnz = Bi[m]; 6654 PetscCount *Ajmap1_new, *Bjmap1_new; 6655 6656 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6657 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6658 6659 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6660 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6661 6662 PetscCall(PetscFree(Aimap1)); 6663 PetscCall(PetscFree(Ajmap1)); 6664 PetscCall(PetscFree(Bimap1)); 6665 PetscCall(PetscFree(Bjmap1)); 6666 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6667 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6668 PetscCall(PetscFree(perm1)); 6669 PetscCall(PetscFree3(i2, j2, perm2)); 6670 6671 Ajmap1 = Ajmap1_new; 6672 Bjmap1 = Bjmap1_new; 6673 6674 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6675 if (Annz < Annz1 + Annz2) { 6676 PetscInt *Aj_new; 6677 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6678 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6679 PetscCall(PetscFree(Aj)); 6680 Aj = Aj_new; 6681 } 6682 6683 if (Bnnz < Bnnz1 + Bnnz2) { 6684 PetscInt *Bj_new; 6685 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6686 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6687 PetscCall(PetscFree(Bj)); 6688 Bj = Bj_new; 6689 } 6690 6691 /* Create new submatrices for on-process and off-process coupling */ 6692 PetscScalar *Aa, *Ba; 6693 MatType rtype; 6694 Mat_SeqAIJ *a, *b; 6695 PetscObjectState state; 6696 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6697 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6698 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6699 if (cstart) { 6700 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6701 } 6702 6703 PetscCall(MatGetRootType_Private(mat, &rtype)); 6704 6705 MatSeqXAIJGetOptions_Private(mpiaij->A); 6706 PetscCall(MatDestroy(&mpiaij->A)); 6707 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6708 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6709 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6710 6711 MatSeqXAIJGetOptions_Private(mpiaij->B); 6712 PetscCall(MatDestroy(&mpiaij->B)); 6713 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6714 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6715 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6716 6717 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6718 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6719 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6720 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6721 6722 a = (Mat_SeqAIJ *)mpiaij->A->data; 6723 b = (Mat_SeqAIJ *)mpiaij->B->data; 6724 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6725 a->free_a = b->free_a = PETSC_TRUE; 6726 a->free_ij = b->free_ij = PETSC_TRUE; 6727 6728 /* conversion must happen AFTER multiply setup */ 6729 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6730 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6731 PetscCall(VecDestroy(&mpiaij->lvec)); 6732 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6733 6734 // Put the COO struct in a container and then attach that to the matrix 6735 PetscCall(PetscMalloc1(1, &coo)); 6736 coo->n = coo_n; 6737 coo->sf = sf2; 6738 coo->sendlen = nleaves; 6739 coo->recvlen = nroots; 6740 coo->Annz = Annz; 6741 coo->Bnnz = Bnnz; 6742 coo->Annz2 = Annz2; 6743 coo->Bnnz2 = Bnnz2; 6744 coo->Atot1 = Atot1; 6745 coo->Atot2 = Atot2; 6746 coo->Btot1 = Btot1; 6747 coo->Btot2 = Btot2; 6748 coo->Ajmap1 = Ajmap1; 6749 coo->Aperm1 = Aperm1; 6750 coo->Bjmap1 = Bjmap1; 6751 coo->Bperm1 = Bperm1; 6752 coo->Aimap2 = Aimap2; 6753 coo->Ajmap2 = Ajmap2; 6754 coo->Aperm2 = Aperm2; 6755 coo->Bimap2 = Bimap2; 6756 coo->Bjmap2 = Bjmap2; 6757 coo->Bperm2 = Bperm2; 6758 coo->Cperm1 = Cperm1; 6759 // Allocate in preallocation. If not used, it has zero cost on host 6760 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6761 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6762 PetscCall(PetscContainerSetPointer(container, coo)); 6763 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6764 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6765 PetscCall(PetscContainerDestroy(&container)); 6766 PetscFunctionReturn(PETSC_SUCCESS); 6767 } 6768 6769 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6770 { 6771 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6772 Mat A = mpiaij->A, B = mpiaij->B; 6773 PetscScalar *Aa, *Ba; 6774 PetscScalar *sendbuf, *recvbuf; 6775 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6776 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6777 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6778 const PetscCount *Cperm1; 6779 PetscContainer container; 6780 MatCOOStruct_MPIAIJ *coo; 6781 6782 PetscFunctionBegin; 6783 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6784 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6785 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6786 sendbuf = coo->sendbuf; 6787 recvbuf = coo->recvbuf; 6788 Ajmap1 = coo->Ajmap1; 6789 Ajmap2 = coo->Ajmap2; 6790 Aimap2 = coo->Aimap2; 6791 Bjmap1 = coo->Bjmap1; 6792 Bjmap2 = coo->Bjmap2; 6793 Bimap2 = coo->Bimap2; 6794 Aperm1 = coo->Aperm1; 6795 Aperm2 = coo->Aperm2; 6796 Bperm1 = coo->Bperm1; 6797 Bperm2 = coo->Bperm2; 6798 Cperm1 = coo->Cperm1; 6799 6800 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6801 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6802 6803 /* Pack entries to be sent to remote */ 6804 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6805 6806 /* Send remote entries to their owner and overlap the communication with local computation */ 6807 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6808 /* Add local entries to A and B */ 6809 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6810 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6811 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6812 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6813 } 6814 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6815 PetscScalar sum = 0.0; 6816 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6817 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6818 } 6819 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6820 6821 /* Add received remote entries to A and B */ 6822 for (PetscCount i = 0; i < coo->Annz2; i++) { 6823 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6824 } 6825 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6826 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6827 } 6828 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6829 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6830 PetscFunctionReturn(PETSC_SUCCESS); 6831 } 6832 6833 /*MC 6834 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6835 6836 Options Database Keys: 6837 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6838 6839 Level: beginner 6840 6841 Notes: 6842 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6843 in this case the values associated with the rows and columns one passes in are set to zero 6844 in the matrix 6845 6846 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6847 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6848 6849 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6850 M*/ 6851 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6852 { 6853 Mat_MPIAIJ *b; 6854 PetscMPIInt size; 6855 6856 PetscFunctionBegin; 6857 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6858 6859 PetscCall(PetscNew(&b)); 6860 B->data = (void *)b; 6861 B->ops[0] = MatOps_Values; 6862 B->assembled = PETSC_FALSE; 6863 B->insertmode = NOT_SET_VALUES; 6864 b->size = size; 6865 6866 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6867 6868 /* build cache for off array entries formed */ 6869 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6870 6871 b->donotstash = PETSC_FALSE; 6872 b->colmap = NULL; 6873 b->garray = NULL; 6874 b->roworiented = PETSC_TRUE; 6875 6876 /* stuff used for matrix vector multiply */ 6877 b->lvec = NULL; 6878 b->Mvctx = NULL; 6879 6880 /* stuff for MatGetRow() */ 6881 b->rowindices = NULL; 6882 b->rowvalues = NULL; 6883 b->getrowactive = PETSC_FALSE; 6884 6885 /* flexible pointer used in CUSPARSE classes */ 6886 b->spptr = NULL; 6887 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6898 #if defined(PETSC_HAVE_CUDA) 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6900 #endif 6901 #if defined(PETSC_HAVE_HIP) 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6903 #endif 6904 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6906 #endif 6907 #if defined(PETSC_HAVE_MKL_SPARSE) 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6909 #endif 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6914 #if defined(PETSC_HAVE_ELEMENTAL) 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6916 #endif 6917 #if defined(PETSC_HAVE_SCALAPACK) 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6919 #endif 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6922 #if defined(PETSC_HAVE_HYPRE) 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6925 #endif 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6930 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6931 PetscFunctionReturn(PETSC_SUCCESS); 6932 } 6933 6934 /*@C 6935 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6936 and "off-diagonal" part of the matrix in CSR format. 6937 6938 Collective 6939 6940 Input Parameters: 6941 + comm - MPI communicator 6942 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6943 . n - This value should be the same as the local size used in creating the 6944 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6945 calculated if `N` is given) For square matrices `n` is almost always `m`. 6946 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6947 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6948 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6949 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6950 . a - matrix values 6951 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6952 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6953 - oa - matrix values 6954 6955 Output Parameter: 6956 . mat - the matrix 6957 6958 Level: advanced 6959 6960 Notes: 6961 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6962 must free the arrays once the matrix has been destroyed and not before. 6963 6964 The `i` and `j` indices are 0 based 6965 6966 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6967 6968 This sets local rows and cannot be used to set off-processor values. 6969 6970 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6971 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6972 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6973 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6974 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6975 communication if it is known that only local entries will be set. 6976 6977 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6978 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6979 @*/ 6980 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6981 { 6982 Mat_MPIAIJ *maij; 6983 6984 PetscFunctionBegin; 6985 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6986 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6987 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6988 PetscCall(MatCreate(comm, mat)); 6989 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6990 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6991 maij = (Mat_MPIAIJ *)(*mat)->data; 6992 6993 (*mat)->preallocated = PETSC_TRUE; 6994 6995 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6996 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6997 6998 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6999 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7000 7001 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7002 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7003 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7004 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7005 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7006 PetscFunctionReturn(PETSC_SUCCESS); 7007 } 7008 7009 typedef struct { 7010 Mat *mp; /* intermediate products */ 7011 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7012 PetscInt cp; /* number of intermediate products */ 7013 7014 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7015 PetscInt *startsj_s, *startsj_r; 7016 PetscScalar *bufa; 7017 Mat P_oth; 7018 7019 /* may take advantage of merging product->B */ 7020 Mat Bloc; /* B-local by merging diag and off-diag */ 7021 7022 /* cusparse does not have support to split between symbolic and numeric phases. 7023 When api_user is true, we don't need to update the numerical values 7024 of the temporary storage */ 7025 PetscBool reusesym; 7026 7027 /* support for COO values insertion */ 7028 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7029 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7030 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7031 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7032 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7033 PetscMemType mtype; 7034 7035 /* customization */ 7036 PetscBool abmerge; 7037 PetscBool P_oth_bind; 7038 } MatMatMPIAIJBACKEND; 7039 7040 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7041 { 7042 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7043 PetscInt i; 7044 7045 PetscFunctionBegin; 7046 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7047 PetscCall(PetscFree(mmdata->bufa)); 7048 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7049 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7050 PetscCall(MatDestroy(&mmdata->P_oth)); 7051 PetscCall(MatDestroy(&mmdata->Bloc)); 7052 PetscCall(PetscSFDestroy(&mmdata->sf)); 7053 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7054 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7055 PetscCall(PetscFree(mmdata->own[0])); 7056 PetscCall(PetscFree(mmdata->own)); 7057 PetscCall(PetscFree(mmdata->off[0])); 7058 PetscCall(PetscFree(mmdata->off)); 7059 PetscCall(PetscFree(mmdata)); 7060 PetscFunctionReturn(PETSC_SUCCESS); 7061 } 7062 7063 /* Copy selected n entries with indices in idx[] of A to v[]. 7064 If idx is NULL, copy the whole data array of A to v[] 7065 */ 7066 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7067 { 7068 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7069 7070 PetscFunctionBegin; 7071 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7072 if (f) { 7073 PetscCall((*f)(A, n, idx, v)); 7074 } else { 7075 const PetscScalar *vv; 7076 7077 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7078 if (n && idx) { 7079 PetscScalar *w = v; 7080 const PetscInt *oi = idx; 7081 PetscInt j; 7082 7083 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7084 } else { 7085 PetscCall(PetscArraycpy(v, vv, n)); 7086 } 7087 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7088 } 7089 PetscFunctionReturn(PETSC_SUCCESS); 7090 } 7091 7092 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7093 { 7094 MatMatMPIAIJBACKEND *mmdata; 7095 PetscInt i, n_d, n_o; 7096 7097 PetscFunctionBegin; 7098 MatCheckProduct(C, 1); 7099 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7100 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7101 if (!mmdata->reusesym) { /* update temporary matrices */ 7102 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7103 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7104 } 7105 mmdata->reusesym = PETSC_FALSE; 7106 7107 for (i = 0; i < mmdata->cp; i++) { 7108 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7109 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7110 } 7111 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7112 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7113 7114 if (mmdata->mptmp[i]) continue; 7115 if (noff) { 7116 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7117 7118 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7119 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7120 n_o += noff; 7121 n_d += nown; 7122 } else { 7123 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7124 7125 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7126 n_d += mm->nz; 7127 } 7128 } 7129 if (mmdata->hasoffproc) { /* offprocess insertion */ 7130 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7131 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7132 } 7133 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7134 PetscFunctionReturn(PETSC_SUCCESS); 7135 } 7136 7137 /* Support for Pt * A, A * P, or Pt * A * P */ 7138 #define MAX_NUMBER_INTERMEDIATE 4 7139 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7140 { 7141 Mat_Product *product = C->product; 7142 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7143 Mat_MPIAIJ *a, *p; 7144 MatMatMPIAIJBACKEND *mmdata; 7145 ISLocalToGlobalMapping P_oth_l2g = NULL; 7146 IS glob = NULL; 7147 const char *prefix; 7148 char pprefix[256]; 7149 const PetscInt *globidx, *P_oth_idx; 7150 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7151 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7152 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7153 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7154 /* a base offset; type-2: sparse with a local to global map table */ 7155 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7156 7157 MatProductType ptype; 7158 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7159 PetscMPIInt size; 7160 7161 PetscFunctionBegin; 7162 MatCheckProduct(C, 1); 7163 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7164 ptype = product->type; 7165 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7166 ptype = MATPRODUCT_AB; 7167 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7168 } 7169 switch (ptype) { 7170 case MATPRODUCT_AB: 7171 A = product->A; 7172 P = product->B; 7173 m = A->rmap->n; 7174 n = P->cmap->n; 7175 M = A->rmap->N; 7176 N = P->cmap->N; 7177 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7178 break; 7179 case MATPRODUCT_AtB: 7180 P = product->A; 7181 A = product->B; 7182 m = P->cmap->n; 7183 n = A->cmap->n; 7184 M = P->cmap->N; 7185 N = A->cmap->N; 7186 hasoffproc = PETSC_TRUE; 7187 break; 7188 case MATPRODUCT_PtAP: 7189 A = product->A; 7190 P = product->B; 7191 m = P->cmap->n; 7192 n = P->cmap->n; 7193 M = P->cmap->N; 7194 N = P->cmap->N; 7195 hasoffproc = PETSC_TRUE; 7196 break; 7197 default: 7198 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7199 } 7200 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7201 if (size == 1) hasoffproc = PETSC_FALSE; 7202 7203 /* defaults */ 7204 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7205 mp[i] = NULL; 7206 mptmp[i] = PETSC_FALSE; 7207 rmapt[i] = -1; 7208 cmapt[i] = -1; 7209 rmapa[i] = NULL; 7210 cmapa[i] = NULL; 7211 } 7212 7213 /* customization */ 7214 PetscCall(PetscNew(&mmdata)); 7215 mmdata->reusesym = product->api_user; 7216 if (ptype == MATPRODUCT_AB) { 7217 if (product->api_user) { 7218 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7219 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7220 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7221 PetscOptionsEnd(); 7222 } else { 7223 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7224 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7225 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7226 PetscOptionsEnd(); 7227 } 7228 } else if (ptype == MATPRODUCT_PtAP) { 7229 if (product->api_user) { 7230 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7231 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7232 PetscOptionsEnd(); 7233 } else { 7234 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7235 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7236 PetscOptionsEnd(); 7237 } 7238 } 7239 a = (Mat_MPIAIJ *)A->data; 7240 p = (Mat_MPIAIJ *)P->data; 7241 PetscCall(MatSetSizes(C, m, n, M, N)); 7242 PetscCall(PetscLayoutSetUp(C->rmap)); 7243 PetscCall(PetscLayoutSetUp(C->cmap)); 7244 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7245 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7246 7247 cp = 0; 7248 switch (ptype) { 7249 case MATPRODUCT_AB: /* A * P */ 7250 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7251 7252 /* A_diag * P_local (merged or not) */ 7253 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7254 /* P is product->B */ 7255 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7256 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7257 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7258 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7259 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7260 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7261 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7262 mp[cp]->product->api_user = product->api_user; 7263 PetscCall(MatProductSetFromOptions(mp[cp])); 7264 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7265 PetscCall(ISGetIndices(glob, &globidx)); 7266 rmapt[cp] = 1; 7267 cmapt[cp] = 2; 7268 cmapa[cp] = globidx; 7269 mptmp[cp] = PETSC_FALSE; 7270 cp++; 7271 } else { /* A_diag * P_diag and A_diag * P_off */ 7272 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7273 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7274 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7275 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7276 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7277 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7278 mp[cp]->product->api_user = product->api_user; 7279 PetscCall(MatProductSetFromOptions(mp[cp])); 7280 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7281 rmapt[cp] = 1; 7282 cmapt[cp] = 1; 7283 mptmp[cp] = PETSC_FALSE; 7284 cp++; 7285 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7286 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7287 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7288 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7289 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7290 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7291 mp[cp]->product->api_user = product->api_user; 7292 PetscCall(MatProductSetFromOptions(mp[cp])); 7293 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7294 rmapt[cp] = 1; 7295 cmapt[cp] = 2; 7296 cmapa[cp] = p->garray; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 } 7300 7301 /* A_off * P_other */ 7302 if (mmdata->P_oth) { 7303 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7304 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7305 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7306 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7307 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7308 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7309 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7310 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7311 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7312 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7313 mp[cp]->product->api_user = product->api_user; 7314 PetscCall(MatProductSetFromOptions(mp[cp])); 7315 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7316 rmapt[cp] = 1; 7317 cmapt[cp] = 2; 7318 cmapa[cp] = P_oth_idx; 7319 mptmp[cp] = PETSC_FALSE; 7320 cp++; 7321 } 7322 break; 7323 7324 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7325 /* A is product->B */ 7326 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7327 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7328 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7329 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7330 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7331 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7332 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7333 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7334 mp[cp]->product->api_user = product->api_user; 7335 PetscCall(MatProductSetFromOptions(mp[cp])); 7336 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7337 PetscCall(ISGetIndices(glob, &globidx)); 7338 rmapt[cp] = 2; 7339 rmapa[cp] = globidx; 7340 cmapt[cp] = 2; 7341 cmapa[cp] = globidx; 7342 mptmp[cp] = PETSC_FALSE; 7343 cp++; 7344 } else { 7345 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7346 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7347 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7348 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7349 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7350 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7351 mp[cp]->product->api_user = product->api_user; 7352 PetscCall(MatProductSetFromOptions(mp[cp])); 7353 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7354 PetscCall(ISGetIndices(glob, &globidx)); 7355 rmapt[cp] = 1; 7356 cmapt[cp] = 2; 7357 cmapa[cp] = globidx; 7358 mptmp[cp] = PETSC_FALSE; 7359 cp++; 7360 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7361 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7362 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7363 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7364 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7365 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7366 mp[cp]->product->api_user = product->api_user; 7367 PetscCall(MatProductSetFromOptions(mp[cp])); 7368 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7369 rmapt[cp] = 2; 7370 rmapa[cp] = p->garray; 7371 cmapt[cp] = 2; 7372 cmapa[cp] = globidx; 7373 mptmp[cp] = PETSC_FALSE; 7374 cp++; 7375 } 7376 break; 7377 case MATPRODUCT_PtAP: 7378 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7379 /* P is product->B */ 7380 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7381 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7382 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7383 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7384 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7385 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7386 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7387 mp[cp]->product->api_user = product->api_user; 7388 PetscCall(MatProductSetFromOptions(mp[cp])); 7389 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7390 PetscCall(ISGetIndices(glob, &globidx)); 7391 rmapt[cp] = 2; 7392 rmapa[cp] = globidx; 7393 cmapt[cp] = 2; 7394 cmapa[cp] = globidx; 7395 mptmp[cp] = PETSC_FALSE; 7396 cp++; 7397 if (mmdata->P_oth) { 7398 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7399 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7400 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7401 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7402 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7403 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7404 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7405 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7406 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7407 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7408 mp[cp]->product->api_user = product->api_user; 7409 PetscCall(MatProductSetFromOptions(mp[cp])); 7410 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7411 mptmp[cp] = PETSC_TRUE; 7412 cp++; 7413 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7414 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7415 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7416 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7417 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7418 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7419 mp[cp]->product->api_user = product->api_user; 7420 PetscCall(MatProductSetFromOptions(mp[cp])); 7421 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7422 rmapt[cp] = 2; 7423 rmapa[cp] = globidx; 7424 cmapt[cp] = 2; 7425 cmapa[cp] = P_oth_idx; 7426 mptmp[cp] = PETSC_FALSE; 7427 cp++; 7428 } 7429 break; 7430 default: 7431 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7432 } 7433 /* sanity check */ 7434 if (size > 1) 7435 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7436 7437 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7438 for (i = 0; i < cp; i++) { 7439 mmdata->mp[i] = mp[i]; 7440 mmdata->mptmp[i] = mptmp[i]; 7441 } 7442 mmdata->cp = cp; 7443 C->product->data = mmdata; 7444 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7445 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7446 7447 /* memory type */ 7448 mmdata->mtype = PETSC_MEMTYPE_HOST; 7449 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7450 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7451 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7452 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7453 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7454 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7455 7456 /* prepare coo coordinates for values insertion */ 7457 7458 /* count total nonzeros of those intermediate seqaij Mats 7459 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7460 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7461 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7462 */ 7463 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7464 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7465 if (mptmp[cp]) continue; 7466 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7467 const PetscInt *rmap = rmapa[cp]; 7468 const PetscInt mr = mp[cp]->rmap->n; 7469 const PetscInt rs = C->rmap->rstart; 7470 const PetscInt re = C->rmap->rend; 7471 const PetscInt *ii = mm->i; 7472 for (i = 0; i < mr; i++) { 7473 const PetscInt gr = rmap[i]; 7474 const PetscInt nz = ii[i + 1] - ii[i]; 7475 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7476 else ncoo_oown += nz; /* this row is local */ 7477 } 7478 } else ncoo_d += mm->nz; 7479 } 7480 7481 /* 7482 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7483 7484 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7485 7486 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7487 7488 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7489 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7490 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7491 7492 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7493 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7494 */ 7495 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7496 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7497 7498 /* gather (i,j) of nonzeros inserted by remote procs */ 7499 if (hasoffproc) { 7500 PetscSF msf; 7501 PetscInt ncoo2, *coo_i2, *coo_j2; 7502 7503 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7504 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7505 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7506 7507 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7508 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7509 PetscInt *idxoff = mmdata->off[cp]; 7510 PetscInt *idxown = mmdata->own[cp]; 7511 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7512 const PetscInt *rmap = rmapa[cp]; 7513 const PetscInt *cmap = cmapa[cp]; 7514 const PetscInt *ii = mm->i; 7515 PetscInt *coi = coo_i + ncoo_o; 7516 PetscInt *coj = coo_j + ncoo_o; 7517 const PetscInt mr = mp[cp]->rmap->n; 7518 const PetscInt rs = C->rmap->rstart; 7519 const PetscInt re = C->rmap->rend; 7520 const PetscInt cs = C->cmap->rstart; 7521 for (i = 0; i < mr; i++) { 7522 const PetscInt *jj = mm->j + ii[i]; 7523 const PetscInt gr = rmap[i]; 7524 const PetscInt nz = ii[i + 1] - ii[i]; 7525 if (gr < rs || gr >= re) { /* this is an offproc row */ 7526 for (j = ii[i]; j < ii[i + 1]; j++) { 7527 *coi++ = gr; 7528 *idxoff++ = j; 7529 } 7530 if (!cmapt[cp]) { /* already global */ 7531 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7532 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7533 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7534 } else { /* offdiag */ 7535 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7536 } 7537 ncoo_o += nz; 7538 } else { /* this is a local row */ 7539 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7540 } 7541 } 7542 } 7543 mmdata->off[cp + 1] = idxoff; 7544 mmdata->own[cp + 1] = idxown; 7545 } 7546 7547 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7548 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7549 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7550 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7551 ncoo = ncoo_d + ncoo_oown + ncoo2; 7552 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7553 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7554 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7555 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7556 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7557 PetscCall(PetscFree2(coo_i, coo_j)); 7558 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7559 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7560 coo_i = coo_i2; 7561 coo_j = coo_j2; 7562 } else { /* no offproc values insertion */ 7563 ncoo = ncoo_d; 7564 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7565 7566 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7567 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7568 PetscCall(PetscSFSetUp(mmdata->sf)); 7569 } 7570 mmdata->hasoffproc = hasoffproc; 7571 7572 /* gather (i,j) of nonzeros inserted locally */ 7573 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7574 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7575 PetscInt *coi = coo_i + ncoo_d; 7576 PetscInt *coj = coo_j + ncoo_d; 7577 const PetscInt *jj = mm->j; 7578 const PetscInt *ii = mm->i; 7579 const PetscInt *cmap = cmapa[cp]; 7580 const PetscInt *rmap = rmapa[cp]; 7581 const PetscInt mr = mp[cp]->rmap->n; 7582 const PetscInt rs = C->rmap->rstart; 7583 const PetscInt re = C->rmap->rend; 7584 const PetscInt cs = C->cmap->rstart; 7585 7586 if (mptmp[cp]) continue; 7587 if (rmapt[cp] == 1) { /* consecutive rows */ 7588 /* fill coo_i */ 7589 for (i = 0; i < mr; i++) { 7590 const PetscInt gr = i + rs; 7591 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7592 } 7593 /* fill coo_j */ 7594 if (!cmapt[cp]) { /* type-0, already global */ 7595 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7596 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7597 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7598 } else { /* type-2, local to global for sparse columns */ 7599 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7600 } 7601 ncoo_d += mm->nz; 7602 } else if (rmapt[cp] == 2) { /* sparse rows */ 7603 for (i = 0; i < mr; i++) { 7604 const PetscInt *jj = mm->j + ii[i]; 7605 const PetscInt gr = rmap[i]; 7606 const PetscInt nz = ii[i + 1] - ii[i]; 7607 if (gr >= rs && gr < re) { /* local rows */ 7608 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7609 if (!cmapt[cp]) { /* type-0, already global */ 7610 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7611 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7612 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7613 } else { /* type-2, local to global for sparse columns */ 7614 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7615 } 7616 ncoo_d += nz; 7617 } 7618 } 7619 } 7620 } 7621 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7622 PetscCall(ISDestroy(&glob)); 7623 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7624 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7625 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7626 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7627 7628 /* preallocate with COO data */ 7629 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7630 PetscCall(PetscFree2(coo_i, coo_j)); 7631 PetscFunctionReturn(PETSC_SUCCESS); 7632 } 7633 7634 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7635 { 7636 Mat_Product *product = mat->product; 7637 #if defined(PETSC_HAVE_DEVICE) 7638 PetscBool match = PETSC_FALSE; 7639 PetscBool usecpu = PETSC_FALSE; 7640 #else 7641 PetscBool match = PETSC_TRUE; 7642 #endif 7643 7644 PetscFunctionBegin; 7645 MatCheckProduct(mat, 1); 7646 #if defined(PETSC_HAVE_DEVICE) 7647 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7648 if (match) { /* we can always fallback to the CPU if requested */ 7649 switch (product->type) { 7650 case MATPRODUCT_AB: 7651 if (product->api_user) { 7652 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7653 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7654 PetscOptionsEnd(); 7655 } else { 7656 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7657 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7658 PetscOptionsEnd(); 7659 } 7660 break; 7661 case MATPRODUCT_AtB: 7662 if (product->api_user) { 7663 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7664 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7665 PetscOptionsEnd(); 7666 } else { 7667 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7668 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7669 PetscOptionsEnd(); 7670 } 7671 break; 7672 case MATPRODUCT_PtAP: 7673 if (product->api_user) { 7674 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7675 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7676 PetscOptionsEnd(); 7677 } else { 7678 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7679 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7680 PetscOptionsEnd(); 7681 } 7682 break; 7683 default: 7684 break; 7685 } 7686 match = (PetscBool)!usecpu; 7687 } 7688 #endif 7689 if (match) { 7690 switch (product->type) { 7691 case MATPRODUCT_AB: 7692 case MATPRODUCT_AtB: 7693 case MATPRODUCT_PtAP: 7694 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7695 break; 7696 default: 7697 break; 7698 } 7699 } 7700 /* fallback to MPIAIJ ops */ 7701 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7702 PetscFunctionReturn(PETSC_SUCCESS); 7703 } 7704 7705 /* 7706 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7707 7708 n - the number of block indices in cc[] 7709 cc - the block indices (must be large enough to contain the indices) 7710 */ 7711 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7712 { 7713 PetscInt cnt = -1, nidx, j; 7714 const PetscInt *idx; 7715 7716 PetscFunctionBegin; 7717 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7718 if (nidx) { 7719 cnt = 0; 7720 cc[cnt] = idx[0] / bs; 7721 for (j = 1; j < nidx; j++) { 7722 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7723 } 7724 } 7725 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7726 *n = cnt + 1; 7727 PetscFunctionReturn(PETSC_SUCCESS); 7728 } 7729 7730 /* 7731 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7732 7733 ncollapsed - the number of block indices 7734 collapsed - the block indices (must be large enough to contain the indices) 7735 */ 7736 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7737 { 7738 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7739 7740 PetscFunctionBegin; 7741 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7742 for (i = start + 1; i < start + bs; i++) { 7743 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7744 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7745 cprevtmp = cprev; 7746 cprev = merged; 7747 merged = cprevtmp; 7748 } 7749 *ncollapsed = nprev; 7750 if (collapsed) *collapsed = cprev; 7751 PetscFunctionReturn(PETSC_SUCCESS); 7752 } 7753 7754 /* 7755 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7756 7757 Input Parameter: 7758 . Amat - matrix 7759 - symmetrize - make the result symmetric 7760 + scale - scale with diagonal 7761 7762 Output Parameter: 7763 . a_Gmat - output scalar graph >= 0 7764 7765 */ 7766 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7767 { 7768 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7769 MPI_Comm comm; 7770 Mat Gmat; 7771 PetscBool ismpiaij, isseqaij; 7772 Mat a, b, c; 7773 MatType jtype; 7774 7775 PetscFunctionBegin; 7776 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7777 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7778 PetscCall(MatGetSize(Amat, &MM, &NN)); 7779 PetscCall(MatGetBlockSize(Amat, &bs)); 7780 nloc = (Iend - Istart) / bs; 7781 7782 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7783 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7784 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7785 7786 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7787 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7788 implementation */ 7789 if (bs > 1) { 7790 PetscCall(MatGetType(Amat, &jtype)); 7791 PetscCall(MatCreate(comm, &Gmat)); 7792 PetscCall(MatSetType(Gmat, jtype)); 7793 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7794 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7795 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7796 PetscInt *d_nnz, *o_nnz; 7797 MatScalar *aa, val, *AA; 7798 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7799 if (isseqaij) { 7800 a = Amat; 7801 b = NULL; 7802 } else { 7803 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7804 a = d->A; 7805 b = d->B; 7806 } 7807 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7808 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7809 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7810 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7811 const PetscInt *cols1, *cols2; 7812 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7813 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7814 nnz[brow / bs] = nc2 / bs; 7815 if (nc2 % bs) ok = 0; 7816 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7817 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7818 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7819 if (nc1 != nc2) ok = 0; 7820 else { 7821 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7822 if (cols1[jj] != cols2[jj]) ok = 0; 7823 if (cols1[jj] % bs != jj % bs) ok = 0; 7824 } 7825 } 7826 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7827 } 7828 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7829 if (!ok) { 7830 PetscCall(PetscFree2(d_nnz, o_nnz)); 7831 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7832 goto old_bs; 7833 } 7834 } 7835 } 7836 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7837 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7838 PetscCall(PetscFree2(d_nnz, o_nnz)); 7839 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7840 // diag 7841 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7842 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7843 ai = aseq->i; 7844 n = ai[brow + 1] - ai[brow]; 7845 aj = aseq->j + ai[brow]; 7846 for (int k = 0; k < n; k += bs) { // block columns 7847 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7848 val = 0; 7849 if (index_size == 0) { 7850 for (int ii = 0; ii < bs; ii++) { // rows in block 7851 aa = aseq->a + ai[brow + ii] + k; 7852 for (int jj = 0; jj < bs; jj++) { // columns in block 7853 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7854 } 7855 } 7856 } else { // use (index,index) value if provided 7857 for (int iii = 0; iii < index_size; iii++) { // rows in block 7858 int ii = index[iii]; 7859 aa = aseq->a + ai[brow + ii] + k; 7860 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7861 int jj = index[jjj]; 7862 val += PetscAbs(PetscRealPart(aa[jj])); 7863 } 7864 } 7865 } 7866 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7867 AA[k / bs] = val; 7868 } 7869 grow = Istart / bs + brow / bs; 7870 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7871 } 7872 // off-diag 7873 if (ismpiaij) { 7874 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7875 const PetscScalar *vals; 7876 const PetscInt *cols, *garray = aij->garray; 7877 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7878 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7879 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7880 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7881 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7882 AA[k / bs] = 0; 7883 AJ[cidx] = garray[cols[k]] / bs; 7884 } 7885 nc = ncols / bs; 7886 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7887 if (index_size == 0) { 7888 for (int ii = 0; ii < bs; ii++) { // rows in block 7889 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7890 for (int k = 0; k < ncols; k += bs) { 7891 for (int jj = 0; jj < bs; jj++) { // cols in block 7892 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7893 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7894 } 7895 } 7896 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7897 } 7898 } else { // use (index,index) value if provided 7899 for (int iii = 0; iii < index_size; iii++) { // rows in block 7900 int ii = index[iii]; 7901 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7902 for (int k = 0; k < ncols; k += bs) { 7903 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7904 int jj = index[jjj]; 7905 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7906 } 7907 } 7908 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7909 } 7910 } 7911 grow = Istart / bs + brow / bs; 7912 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7913 } 7914 } 7915 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7916 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7917 PetscCall(PetscFree2(AA, AJ)); 7918 } else { 7919 const PetscScalar *vals; 7920 const PetscInt *idx; 7921 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7922 old_bs: 7923 /* 7924 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7925 */ 7926 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7927 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7928 if (isseqaij) { 7929 PetscInt max_d_nnz; 7930 /* 7931 Determine exact preallocation count for (sequential) scalar matrix 7932 */ 7933 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7934 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7935 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7936 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7937 PetscCall(PetscFree3(w0, w1, w2)); 7938 } else if (ismpiaij) { 7939 Mat Daij, Oaij; 7940 const PetscInt *garray; 7941 PetscInt max_d_nnz; 7942 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7943 /* 7944 Determine exact preallocation count for diagonal block portion of scalar matrix 7945 */ 7946 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7947 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7948 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7949 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7950 PetscCall(PetscFree3(w0, w1, w2)); 7951 /* 7952 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7953 */ 7954 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7955 o_nnz[jj] = 0; 7956 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7957 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7958 o_nnz[jj] += ncols; 7959 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7960 } 7961 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7962 } 7963 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7964 /* get scalar copy (norms) of matrix */ 7965 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7966 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7967 PetscCall(PetscFree2(d_nnz, o_nnz)); 7968 for (Ii = Istart; Ii < Iend; Ii++) { 7969 PetscInt dest_row = Ii / bs; 7970 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7971 for (jj = 0; jj < ncols; jj++) { 7972 PetscInt dest_col = idx[jj] / bs; 7973 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7974 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7975 } 7976 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7977 } 7978 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7979 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7980 } 7981 } else { 7982 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7983 else { 7984 Gmat = Amat; 7985 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7986 } 7987 if (isseqaij) { 7988 a = Gmat; 7989 b = NULL; 7990 } else { 7991 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7992 a = d->A; 7993 b = d->B; 7994 } 7995 if (filter >= 0 || scale) { 7996 /* take absolute value of each entry */ 7997 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7998 MatInfo info; 7999 PetscScalar *avals; 8000 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8001 PetscCall(MatSeqAIJGetArray(c, &avals)); 8002 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8003 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8004 } 8005 } 8006 } 8007 if (symmetrize) { 8008 PetscBool isset, issym; 8009 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8010 if (!isset || !issym) { 8011 Mat matTrans; 8012 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8013 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8014 PetscCall(MatDestroy(&matTrans)); 8015 } 8016 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8017 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8018 if (scale) { 8019 /* scale c for all diagonal values = 1 or -1 */ 8020 Vec diag; 8021 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8022 PetscCall(MatGetDiagonal(Gmat, diag)); 8023 PetscCall(VecReciprocal(diag)); 8024 PetscCall(VecSqrtAbs(diag)); 8025 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8026 PetscCall(VecDestroy(&diag)); 8027 } 8028 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8029 8030 if (filter >= 0) { 8031 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8032 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8033 } 8034 *a_Gmat = Gmat; 8035 PetscFunctionReturn(PETSC_SUCCESS); 8036 } 8037 8038 /* 8039 Special version for direct calls from Fortran 8040 */ 8041 #include <petsc/private/fortranimpl.h> 8042 8043 /* Change these macros so can be used in void function */ 8044 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8045 #undef PetscCall 8046 #define PetscCall(...) \ 8047 do { \ 8048 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8049 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8050 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8051 return; \ 8052 } \ 8053 } while (0) 8054 8055 #undef SETERRQ 8056 #define SETERRQ(comm, ierr, ...) \ 8057 do { \ 8058 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8059 return; \ 8060 } while (0) 8061 8062 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8063 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8064 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8065 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8066 #else 8067 #endif 8068 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8069 { 8070 Mat mat = *mmat; 8071 PetscInt m = *mm, n = *mn; 8072 InsertMode addv = *maddv; 8073 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8074 PetscScalar value; 8075 8076 MatCheckPreallocated(mat, 1); 8077 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8078 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8079 { 8080 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8081 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8082 PetscBool roworiented = aij->roworiented; 8083 8084 /* Some Variables required in the macro */ 8085 Mat A = aij->A; 8086 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8087 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8088 MatScalar *aa; 8089 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8090 Mat B = aij->B; 8091 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8092 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8093 MatScalar *ba; 8094 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8095 * cannot use "#if defined" inside a macro. */ 8096 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8097 8098 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8099 PetscInt nonew = a->nonew; 8100 MatScalar *ap1, *ap2; 8101 8102 PetscFunctionBegin; 8103 PetscCall(MatSeqAIJGetArray(A, &aa)); 8104 PetscCall(MatSeqAIJGetArray(B, &ba)); 8105 for (i = 0; i < m; i++) { 8106 if (im[i] < 0) continue; 8107 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8108 if (im[i] >= rstart && im[i] < rend) { 8109 row = im[i] - rstart; 8110 lastcol1 = -1; 8111 rp1 = aj + ai[row]; 8112 ap1 = aa + ai[row]; 8113 rmax1 = aimax[row]; 8114 nrow1 = ailen[row]; 8115 low1 = 0; 8116 high1 = nrow1; 8117 lastcol2 = -1; 8118 rp2 = bj + bi[row]; 8119 ap2 = ba + bi[row]; 8120 rmax2 = bimax[row]; 8121 nrow2 = bilen[row]; 8122 low2 = 0; 8123 high2 = nrow2; 8124 8125 for (j = 0; j < n; j++) { 8126 if (roworiented) value = v[i * n + j]; 8127 else value = v[i + j * m]; 8128 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8129 if (in[j] >= cstart && in[j] < cend) { 8130 col = in[j] - cstart; 8131 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8132 } else if (in[j] < 0) continue; 8133 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8134 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8135 } else { 8136 if (mat->was_assembled) { 8137 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8138 #if defined(PETSC_USE_CTABLE) 8139 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8140 col--; 8141 #else 8142 col = aij->colmap[in[j]] - 1; 8143 #endif 8144 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8145 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8146 col = in[j]; 8147 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8148 B = aij->B; 8149 b = (Mat_SeqAIJ *)B->data; 8150 bimax = b->imax; 8151 bi = b->i; 8152 bilen = b->ilen; 8153 bj = b->j; 8154 rp2 = bj + bi[row]; 8155 ap2 = ba + bi[row]; 8156 rmax2 = bimax[row]; 8157 nrow2 = bilen[row]; 8158 low2 = 0; 8159 high2 = nrow2; 8160 bm = aij->B->rmap->n; 8161 ba = b->a; 8162 inserted = PETSC_FALSE; 8163 } 8164 } else col = in[j]; 8165 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8166 } 8167 } 8168 } else if (!aij->donotstash) { 8169 if (roworiented) { 8170 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8171 } else { 8172 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8173 } 8174 } 8175 } 8176 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8177 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8178 } 8179 PetscFunctionReturnVoid(); 8180 } 8181 8182 /* Undefining these here since they were redefined from their original definition above! No 8183 * other PETSc functions should be defined past this point, as it is impossible to recover the 8184 * original definitions */ 8185 #undef PetscCall 8186 #undef SETERRQ 8187