1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 A->nonzerostate++; \ 422 a_noinsert:; \ 423 ailen[row] = nrow1; \ 424 } while (0) 425 426 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 427 do { \ 428 if (col <= lastcol2) low2 = 0; \ 429 else high2 = nrow2; \ 430 lastcol2 = col; \ 431 while (high2 - low2 > 5) { \ 432 t = (low2 + high2) / 2; \ 433 if (rp2[t] > col) high2 = t; \ 434 else low2 = t; \ 435 } \ 436 for (_i = low2; _i < high2; _i++) { \ 437 if (rp2[_i] > col) break; \ 438 if (rp2[_i] == col) { \ 439 if (addv == ADD_VALUES) { \ 440 ap2[_i] += value; \ 441 (void)PetscLogFlops(1.0); \ 442 } else ap2[_i] = value; \ 443 goto b_noinsert; \ 444 } \ 445 } \ 446 if (value == 0.0 && ignorezeroentries) { \ 447 low2 = 0; \ 448 high2 = nrow2; \ 449 goto b_noinsert; \ 450 } \ 451 if (nonew == 1) { \ 452 low2 = 0; \ 453 high2 = nrow2; \ 454 goto b_noinsert; \ 455 } \ 456 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 457 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 458 N = nrow2++ - 1; \ 459 b->nz++; \ 460 high2++; \ 461 /* shift up all the later entries in this row */ \ 462 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 463 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 464 rp2[_i] = col; \ 465 ap2[_i] = value; \ 466 B->nonzerostate++; \ 467 b_noinsert:; \ 468 bilen[row] = nrow2; \ 469 } while (0) 470 471 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 472 { 473 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 474 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 475 PetscInt l, *garray = mat->garray, diag; 476 PetscScalar *aa, *ba; 477 478 PetscFunctionBegin; 479 /* code only works for square matrices A */ 480 481 /* find size of row to the left of the diagonal part */ 482 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 483 row = row - diag; 484 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 485 if (garray[b->j[b->i[row] + l]] > diag) break; 486 } 487 if (l) { 488 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 489 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 490 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 491 } 492 493 /* diagonal part */ 494 if (a->i[row + 1] - a->i[row]) { 495 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 496 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 497 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 498 } 499 500 /* right of diagonal part */ 501 if (b->i[row + 1] - b->i[row] - l) { 502 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 503 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 504 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 505 } 506 PetscFunctionReturn(PETSC_SUCCESS); 507 } 508 509 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 512 PetscScalar value = 0.0; 513 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 514 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 515 PetscBool roworiented = aij->roworiented; 516 517 /* Some Variables required in the macro */ 518 Mat A = aij->A; 519 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 520 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 521 PetscBool ignorezeroentries = a->ignorezeroentries; 522 Mat B = aij->B; 523 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 524 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 525 MatScalar *aa, *ba; 526 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 527 PetscInt nonew; 528 MatScalar *ap1, *ap2; 529 530 PetscFunctionBegin; 531 PetscCall(MatSeqAIJGetArray(A, &aa)); 532 PetscCall(MatSeqAIJGetArray(B, &ba)); 533 for (i = 0; i < m; i++) { 534 if (im[i] < 0) continue; 535 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 536 if (im[i] >= rstart && im[i] < rend) { 537 row = im[i] - rstart; 538 lastcol1 = -1; 539 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 540 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 541 rmax1 = aimax[row]; 542 nrow1 = ailen[row]; 543 low1 = 0; 544 high1 = nrow1; 545 lastcol2 = -1; 546 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 547 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 548 rmax2 = bimax[row]; 549 nrow2 = bilen[row]; 550 low2 = 0; 551 high2 = nrow2; 552 553 for (j = 0; j < n; j++) { 554 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 555 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 556 if (in[j] >= cstart && in[j] < cend) { 557 col = in[j] - cstart; 558 nonew = a->nonew; 559 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 560 } else if (in[j] < 0) { 561 continue; 562 } else { 563 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 564 if (mat->was_assembled) { 565 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 566 #if defined(PETSC_USE_CTABLE) 567 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 568 col--; 569 #else 570 col = aij->colmap[in[j]] - 1; 571 #endif 572 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 573 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 574 col = in[j]; 575 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 576 B = aij->B; 577 b = (Mat_SeqAIJ *)B->data; 578 bimax = b->imax; 579 bi = b->i; 580 bilen = b->ilen; 581 bj = b->j; 582 ba = b->a; 583 rp2 = bj + bi[row]; 584 ap2 = ba + bi[row]; 585 rmax2 = bimax[row]; 586 nrow2 = bilen[row]; 587 low2 = 0; 588 high2 = nrow2; 589 bm = aij->B->rmap->n; 590 ba = b->a; 591 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 592 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 593 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 594 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 595 } 596 } else col = in[j]; 597 nonew = b->nonew; 598 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 599 } 600 } 601 } else { 602 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 603 if (!aij->donotstash) { 604 mat->assembled = PETSC_FALSE; 605 if (roworiented) { 606 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } else { 608 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 609 } 610 } 611 } 612 } 613 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 614 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 615 PetscFunctionReturn(PETSC_SUCCESS); 616 } 617 618 /* 619 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 620 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 621 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 622 */ 623 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 624 { 625 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 626 Mat A = aij->A; /* diagonal part of the matrix */ 627 Mat B = aij->B; /* off-diagonal part of the matrix */ 628 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 629 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 630 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 631 PetscInt *ailen = a->ilen, *aj = a->j; 632 PetscInt *bilen = b->ilen, *bj = b->j; 633 PetscInt am = aij->A->rmap->n, j; 634 PetscInt diag_so_far = 0, dnz; 635 PetscInt offd_so_far = 0, onz; 636 637 PetscFunctionBegin; 638 /* Iterate over all rows of the matrix */ 639 for (j = 0; j < am; j++) { 640 dnz = onz = 0; 641 /* Iterate over all non-zero columns of the current row */ 642 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 643 /* If column is in the diagonal */ 644 if (mat_j[col] >= cstart && mat_j[col] < cend) { 645 aj[diag_so_far++] = mat_j[col] - cstart; 646 dnz++; 647 } else { /* off-diagonal entries */ 648 bj[offd_so_far++] = mat_j[col]; 649 onz++; 650 } 651 } 652 ailen[j] = dnz; 653 bilen[j] = onz; 654 } 655 PetscFunctionReturn(PETSC_SUCCESS); 656 } 657 658 /* 659 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 660 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 661 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 662 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 663 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 664 */ 665 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 666 { 667 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 668 Mat A = aij->A; /* diagonal part of the matrix */ 669 Mat B = aij->B; /* off-diagonal part of the matrix */ 670 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 671 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 672 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 673 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 674 PetscInt *ailen = a->ilen, *aj = a->j; 675 PetscInt *bilen = b->ilen, *bj = b->j; 676 PetscInt am = aij->A->rmap->n, j; 677 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 678 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 679 PetscScalar *aa = a->a, *ba = b->a; 680 681 PetscFunctionBegin; 682 /* Iterate over all rows of the matrix */ 683 for (j = 0; j < am; j++) { 684 dnz_row = onz_row = 0; 685 rowstart_offd = full_offd_i[j]; 686 rowstart_diag = full_diag_i[j]; 687 /* Iterate over all non-zero columns of the current row */ 688 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 689 /* If column is in the diagonal */ 690 if (mat_j[col] >= cstart && mat_j[col] < cend) { 691 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 692 aa[rowstart_diag + dnz_row] = mat_a[col]; 693 dnz_row++; 694 } else { /* off-diagonal entries */ 695 bj[rowstart_offd + onz_row] = mat_j[col]; 696 ba[rowstart_offd + onz_row] = mat_a[col]; 697 onz_row++; 698 } 699 } 700 ailen[j] = dnz_row; 701 bilen[j] = onz_row; 702 } 703 PetscFunctionReturn(PETSC_SUCCESS); 704 } 705 706 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 707 { 708 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 709 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 710 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 711 712 PetscFunctionBegin; 713 for (i = 0; i < m; i++) { 714 if (idxm[i] < 0) continue; /* negative row */ 715 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 716 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 717 row = idxm[i] - rstart; 718 for (j = 0; j < n; j++) { 719 if (idxn[j] < 0) continue; /* negative column */ 720 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 721 if (idxn[j] >= cstart && idxn[j] < cend) { 722 col = idxn[j] - cstart; 723 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 724 } else { 725 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 726 #if defined(PETSC_USE_CTABLE) 727 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 728 col--; 729 #else 730 col = aij->colmap[idxn[j]] - 1; 731 #endif 732 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 733 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 734 } 735 } 736 } 737 PetscFunctionReturn(PETSC_SUCCESS); 738 } 739 740 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 741 { 742 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 743 PetscInt nstash, reallocs; 744 745 PetscFunctionBegin; 746 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 747 748 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 749 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 750 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 751 PetscFunctionReturn(PETSC_SUCCESS); 752 } 753 754 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 755 { 756 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 757 PetscMPIInt n; 758 PetscInt i, j, rstart, ncols, flg; 759 PetscInt *row, *col; 760 PetscBool other_disassembled; 761 PetscScalar *val; 762 763 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 764 765 PetscFunctionBegin; 766 if (!aij->donotstash && !mat->nooffprocentries) { 767 while (1) { 768 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 769 if (!flg) break; 770 771 for (i = 0; i < n;) { 772 /* Now identify the consecutive vals belonging to the same row */ 773 for (j = i, rstart = row[j]; j < n; j++) { 774 if (row[j] != rstart) break; 775 } 776 if (j < n) ncols = j - i; 777 else ncols = n - i; 778 /* Now assemble all these values with a single function call */ 779 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 780 i = j; 781 } 782 } 783 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 784 } 785 #if defined(PETSC_HAVE_DEVICE) 786 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 787 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 788 if (mat->boundtocpu) { 789 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 790 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 791 } 792 #endif 793 PetscCall(MatAssemblyBegin(aij->A, mode)); 794 PetscCall(MatAssemblyEnd(aij->A, mode)); 795 796 /* determine if any processor has disassembled, if so we must 797 also disassemble ourself, in order that we may reassemble. */ 798 /* 799 if nonzero structure of submatrix B cannot change then we know that 800 no processor disassembled thus we can skip this stuff 801 */ 802 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 803 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 804 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 805 PetscCall(MatDisAssemble_MPIAIJ(mat)); 806 } 807 } 808 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 809 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 810 #if defined(PETSC_HAVE_DEVICE) 811 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 812 #endif 813 PetscCall(MatAssemblyBegin(aij->B, mode)); 814 PetscCall(MatAssemblyEnd(aij->B, mode)); 815 816 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 817 818 aij->rowvalues = NULL; 819 820 PetscCall(VecDestroy(&aij->diag)); 821 822 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 823 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 824 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 825 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 826 } 827 #if defined(PETSC_HAVE_DEVICE) 828 mat->offloadmask = PETSC_OFFLOAD_BOTH; 829 #endif 830 PetscFunctionReturn(PETSC_SUCCESS); 831 } 832 833 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 834 { 835 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 836 837 PetscFunctionBegin; 838 PetscCall(MatZeroEntries(l->A)); 839 PetscCall(MatZeroEntries(l->B)); 840 PetscFunctionReturn(PETSC_SUCCESS); 841 } 842 843 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 844 { 845 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 846 PetscInt *lrows; 847 PetscInt r, len; 848 PetscBool cong; 849 850 PetscFunctionBegin; 851 /* get locally owned rows */ 852 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 853 PetscCall(MatHasCongruentLayouts(A, &cong)); 854 /* fix right hand side if needed */ 855 if (x && b) { 856 const PetscScalar *xx; 857 PetscScalar *bb; 858 859 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 860 PetscCall(VecGetArrayRead(x, &xx)); 861 PetscCall(VecGetArray(b, &bb)); 862 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 863 PetscCall(VecRestoreArrayRead(x, &xx)); 864 PetscCall(VecRestoreArray(b, &bb)); 865 } 866 867 if (diag != 0.0 && cong) { 868 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 869 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 870 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 871 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 872 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 873 PetscInt nnwA, nnwB; 874 PetscBool nnzA, nnzB; 875 876 nnwA = aijA->nonew; 877 nnwB = aijB->nonew; 878 nnzA = aijA->keepnonzeropattern; 879 nnzB = aijB->keepnonzeropattern; 880 if (!nnzA) { 881 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 882 aijA->nonew = 0; 883 } 884 if (!nnzB) { 885 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 886 aijB->nonew = 0; 887 } 888 /* Must zero here before the next loop */ 889 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 890 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 891 for (r = 0; r < len; ++r) { 892 const PetscInt row = lrows[r] + A->rmap->rstart; 893 if (row >= A->cmap->N) continue; 894 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 895 } 896 aijA->nonew = nnwA; 897 aijB->nonew = nnwB; 898 } else { 899 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 900 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 901 } 902 PetscCall(PetscFree(lrows)); 903 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 904 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 905 906 /* only change matrix nonzero state if pattern was allowed to be changed */ 907 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 908 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 909 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 910 } 911 PetscFunctionReturn(PETSC_SUCCESS); 912 } 913 914 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 915 { 916 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 917 PetscMPIInt n = A->rmap->n; 918 PetscInt i, j, r, m, len = 0; 919 PetscInt *lrows, *owners = A->rmap->range; 920 PetscMPIInt p = 0; 921 PetscSFNode *rrows; 922 PetscSF sf; 923 const PetscScalar *xx; 924 PetscScalar *bb, *mask, *aij_a; 925 Vec xmask, lmask; 926 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 927 const PetscInt *aj, *ii, *ridx; 928 PetscScalar *aa; 929 930 PetscFunctionBegin; 931 /* Create SF where leaves are input rows and roots are owned rows */ 932 PetscCall(PetscMalloc1(n, &lrows)); 933 for (r = 0; r < n; ++r) lrows[r] = -1; 934 PetscCall(PetscMalloc1(N, &rrows)); 935 for (r = 0; r < N; ++r) { 936 const PetscInt idx = rows[r]; 937 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 938 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 939 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 940 } 941 rrows[r].rank = p; 942 rrows[r].index = rows[r] - owners[p]; 943 } 944 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 945 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 946 /* Collect flags for rows to be zeroed */ 947 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 948 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 949 PetscCall(PetscSFDestroy(&sf)); 950 /* Compress and put in row numbers */ 951 for (r = 0; r < n; ++r) 952 if (lrows[r] >= 0) lrows[len++] = r; 953 /* zero diagonal part of matrix */ 954 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 955 /* handle off-diagonal part of matrix */ 956 PetscCall(MatCreateVecs(A, &xmask, NULL)); 957 PetscCall(VecDuplicate(l->lvec, &lmask)); 958 PetscCall(VecGetArray(xmask, &bb)); 959 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 960 PetscCall(VecRestoreArray(xmask, &bb)); 961 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 962 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 963 PetscCall(VecDestroy(&xmask)); 964 if (x && b) { /* this code is buggy when the row and column layout don't match */ 965 PetscBool cong; 966 967 PetscCall(MatHasCongruentLayouts(A, &cong)); 968 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 969 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 970 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 971 PetscCall(VecGetArrayRead(l->lvec, &xx)); 972 PetscCall(VecGetArray(b, &bb)); 973 } 974 PetscCall(VecGetArray(lmask, &mask)); 975 /* remove zeroed rows of off-diagonal matrix */ 976 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 977 ii = aij->i; 978 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 979 /* loop over all elements of off process part of matrix zeroing removed columns*/ 980 if (aij->compressedrow.use) { 981 m = aij->compressedrow.nrows; 982 ii = aij->compressedrow.i; 983 ridx = aij->compressedrow.rindex; 984 for (i = 0; i < m; i++) { 985 n = ii[i + 1] - ii[i]; 986 aj = aij->j + ii[i]; 987 aa = aij_a + ii[i]; 988 989 for (j = 0; j < n; j++) { 990 if (PetscAbsScalar(mask[*aj])) { 991 if (b) bb[*ridx] -= *aa * xx[*aj]; 992 *aa = 0.0; 993 } 994 aa++; 995 aj++; 996 } 997 ridx++; 998 } 999 } else { /* do not use compressed row format */ 1000 m = l->B->rmap->n; 1001 for (i = 0; i < m; i++) { 1002 n = ii[i + 1] - ii[i]; 1003 aj = aij->j + ii[i]; 1004 aa = aij_a + ii[i]; 1005 for (j = 0; j < n; j++) { 1006 if (PetscAbsScalar(mask[*aj])) { 1007 if (b) bb[i] -= *aa * xx[*aj]; 1008 *aa = 0.0; 1009 } 1010 aa++; 1011 aj++; 1012 } 1013 } 1014 } 1015 if (x && b) { 1016 PetscCall(VecRestoreArray(b, &bb)); 1017 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1018 } 1019 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1020 PetscCall(VecRestoreArray(lmask, &mask)); 1021 PetscCall(VecDestroy(&lmask)); 1022 PetscCall(PetscFree(lrows)); 1023 1024 /* only change matrix nonzero state if pattern was allowed to be changed */ 1025 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1026 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1027 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1028 } 1029 PetscFunctionReturn(PETSC_SUCCESS); 1030 } 1031 1032 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1033 { 1034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1035 PetscInt nt; 1036 VecScatter Mvctx = a->Mvctx; 1037 1038 PetscFunctionBegin; 1039 PetscCall(VecGetLocalSize(xx, &nt)); 1040 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1041 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->A, mult, xx, yy); 1043 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1044 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 1048 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1049 { 1050 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1051 1052 PetscFunctionBegin; 1053 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1054 PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1058 { 1059 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1060 VecScatter Mvctx = a->Mvctx; 1061 1062 PetscFunctionBegin; 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 /* do nondiagonal part */ 1076 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1077 /* do local part */ 1078 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1079 /* add partial results together */ 1080 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1081 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1082 PetscFunctionReturn(PETSC_SUCCESS); 1083 } 1084 1085 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1086 { 1087 MPI_Comm comm; 1088 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1089 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1090 IS Me, Notme; 1091 PetscInt M, N, first, last, *notme, i; 1092 PetscBool lf; 1093 PetscMPIInt size; 1094 1095 PetscFunctionBegin; 1096 /* Easy test: symmetric diagonal block */ 1097 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1098 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1099 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1100 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1101 PetscCallMPI(MPI_Comm_size(comm, &size)); 1102 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1103 1104 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1105 PetscCall(MatGetSize(Amat, &M, &N)); 1106 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1107 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1108 for (i = 0; i < first; i++) notme[i] = i; 1109 for (i = last; i < M; i++) notme[i - last + first] = i; 1110 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1111 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1112 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1113 Aoff = Aoffs[0]; 1114 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1115 Boff = Boffs[0]; 1116 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1117 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1118 PetscCall(MatDestroyMatrices(1, &Boffs)); 1119 PetscCall(ISDestroy(&Me)); 1120 PetscCall(ISDestroy(&Notme)); 1121 PetscCall(PetscFree(notme)); 1122 PetscFunctionReturn(PETSC_SUCCESS); 1123 } 1124 1125 static PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1126 { 1127 PetscFunctionBegin; 1128 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1133 { 1134 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1135 1136 PetscFunctionBegin; 1137 /* do nondiagonal part */ 1138 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1139 /* do local part */ 1140 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1141 /* add partial results together */ 1142 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1143 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 /* 1148 This only works correctly for square matrices where the subblock A->A is the 1149 diagonal block 1150 */ 1151 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1152 { 1153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1154 1155 PetscFunctionBegin; 1156 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1157 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1158 PetscCall(MatGetDiagonal(a->A, v)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1163 { 1164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1165 1166 PetscFunctionBegin; 1167 PetscCall(MatScale(a->A, aa)); 1168 PetscCall(MatScale(a->B, aa)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1173 { 1174 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1175 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1176 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1177 const PetscInt *garray = aij->garray; 1178 const PetscScalar *aa, *ba; 1179 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1180 PetscInt64 nz, hnz; 1181 PetscInt *rowlens; 1182 PetscInt *colidxs; 1183 PetscScalar *matvals; 1184 PetscMPIInt rank; 1185 1186 PetscFunctionBegin; 1187 PetscCall(PetscViewerSetUp(viewer)); 1188 1189 M = mat->rmap->N; 1190 N = mat->cmap->N; 1191 m = mat->rmap->n; 1192 rs = mat->rmap->rstart; 1193 cs = mat->cmap->rstart; 1194 nz = A->nz + B->nz; 1195 1196 /* write matrix header */ 1197 header[0] = MAT_FILE_CLASSID; 1198 header[1] = M; 1199 header[2] = N; 1200 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1201 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1202 if (rank == 0) { 1203 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1204 else header[3] = (PetscInt)hnz; 1205 } 1206 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1207 1208 /* fill in and store row lengths */ 1209 PetscCall(PetscMalloc1(m, &rowlens)); 1210 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1211 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1212 PetscCall(PetscFree(rowlens)); 1213 1214 /* fill in and store column indices */ 1215 PetscCall(PetscMalloc1(nz, &colidxs)); 1216 for (cnt = 0, i = 0; i < m; i++) { 1217 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1218 if (garray[B->j[jb]] > cs) break; 1219 colidxs[cnt++] = garray[B->j[jb]]; 1220 } 1221 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1222 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1223 } 1224 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1225 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1226 PetscCall(PetscFree(colidxs)); 1227 1228 /* fill in and store nonzero values */ 1229 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1230 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1231 PetscCall(PetscMalloc1(nz, &matvals)); 1232 for (cnt = 0, i = 0; i < m; i++) { 1233 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1234 if (garray[B->j[jb]] > cs) break; 1235 matvals[cnt++] = ba[jb]; 1236 } 1237 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1238 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1239 } 1240 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1241 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1242 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1243 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1244 PetscCall(PetscFree(matvals)); 1245 1246 /* write block size option to the viewer's .info file */ 1247 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 #include <petscdraw.h> 1252 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1253 { 1254 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1255 PetscMPIInt rank = aij->rank, size = aij->size; 1256 PetscBool isdraw, iascii, isbinary; 1257 PetscViewer sviewer; 1258 PetscViewerFormat format; 1259 1260 PetscFunctionBegin; 1261 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1262 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1263 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1264 if (iascii) { 1265 PetscCall(PetscViewerGetFormat(viewer, &format)); 1266 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1267 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1268 PetscCall(PetscMalloc1(size, &nz)); 1269 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1270 for (i = 0; i < (PetscInt)size; i++) { 1271 nmax = PetscMax(nmax, nz[i]); 1272 nmin = PetscMin(nmin, nz[i]); 1273 navg += nz[i]; 1274 } 1275 PetscCall(PetscFree(nz)); 1276 navg = navg / size; 1277 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 PetscCall(PetscViewerGetFormat(viewer, &format)); 1281 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1282 MatInfo info; 1283 PetscInt *inodes = NULL; 1284 1285 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1286 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1287 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1288 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1289 if (!inodes) { 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1291 (double)info.memory)); 1292 } else { 1293 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1294 (double)info.memory)); 1295 } 1296 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1297 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1298 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1300 PetscCall(PetscViewerFlush(viewer)); 1301 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1303 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1306 PetscInt inodecount, inodelimit, *inodes; 1307 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1308 if (inodes) { 1309 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1310 } else { 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1312 } 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } 1317 } else if (isbinary) { 1318 if (size == 1) { 1319 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1320 PetscCall(MatView(aij->A, viewer)); 1321 } else { 1322 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1323 } 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } else if (iascii && size == 1) { 1326 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1327 PetscCall(MatView(aij->A, viewer)); 1328 PetscFunctionReturn(PETSC_SUCCESS); 1329 } else if (isdraw) { 1330 PetscDraw draw; 1331 PetscBool isnull; 1332 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1333 PetscCall(PetscDrawIsNull(draw, &isnull)); 1334 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1335 } 1336 1337 { /* assemble the entire matrix onto first processor */ 1338 Mat A = NULL, Av; 1339 IS isrow, iscol; 1340 1341 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1343 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1344 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1345 /* The commented code uses MatCreateSubMatrices instead */ 1346 /* 1347 Mat *AA, A = NULL, Av; 1348 IS isrow,iscol; 1349 1350 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1352 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1353 if (rank == 0) { 1354 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1355 A = AA[0]; 1356 Av = AA[0]; 1357 } 1358 PetscCall(MatDestroySubMatrices(1,&AA)); 1359 */ 1360 PetscCall(ISDestroy(&iscol)); 1361 PetscCall(ISDestroy(&isrow)); 1362 /* 1363 Everyone has to call to draw the matrix since the graphics waits are 1364 synchronized across all processors that share the PetscDraw object 1365 */ 1366 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1367 if (rank == 0) { 1368 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1369 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1370 } 1371 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1372 PetscCall(MatDestroy(&A)); 1373 } 1374 PetscFunctionReturn(PETSC_SUCCESS); 1375 } 1376 1377 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1378 { 1379 PetscBool iascii, isdraw, issocket, isbinary; 1380 1381 PetscFunctionBegin; 1382 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1383 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1384 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1385 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1386 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1391 { 1392 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1393 Vec bb1 = NULL; 1394 PetscBool hasop; 1395 1396 PetscFunctionBegin; 1397 if (flag == SOR_APPLY_UPPER) { 1398 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1403 1404 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1405 if (flag & SOR_ZERO_INITIAL_GUESS) { 1406 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1407 its--; 1408 } 1409 1410 while (its--) { 1411 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1412 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1413 1414 /* update rhs: bb1 = bb - B*x */ 1415 PetscCall(VecScale(mat->lvec, -1.0)); 1416 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1417 1418 /* local sweep */ 1419 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1420 } 1421 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1422 if (flag & SOR_ZERO_INITIAL_GUESS) { 1423 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1424 its--; 1425 } 1426 while (its--) { 1427 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1428 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1429 1430 /* update rhs: bb1 = bb - B*x */ 1431 PetscCall(VecScale(mat->lvec, -1.0)); 1432 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1433 1434 /* local sweep */ 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1436 } 1437 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1438 if (flag & SOR_ZERO_INITIAL_GUESS) { 1439 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1440 its--; 1441 } 1442 while (its--) { 1443 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1444 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1445 1446 /* update rhs: bb1 = bb - B*x */ 1447 PetscCall(VecScale(mat->lvec, -1.0)); 1448 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1449 1450 /* local sweep */ 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1452 } 1453 } else if (flag & SOR_EISENSTAT) { 1454 Vec xx1; 1455 1456 PetscCall(VecDuplicate(bb, &xx1)); 1457 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1458 1459 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1460 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1461 if (!mat->diag) { 1462 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1463 PetscCall(MatGetDiagonal(matin, mat->diag)); 1464 } 1465 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1466 if (hasop) { 1467 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1468 } else { 1469 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1470 } 1471 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1472 1473 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1474 1475 /* local sweep */ 1476 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1477 PetscCall(VecAXPY(xx, 1.0, xx1)); 1478 PetscCall(VecDestroy(&xx1)); 1479 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1480 1481 PetscCall(VecDestroy(&bb1)); 1482 1483 matin->factorerrortype = mat->A->factorerrortype; 1484 PetscFunctionReturn(PETSC_SUCCESS); 1485 } 1486 1487 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1488 { 1489 Mat aA, aB, Aperm; 1490 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1491 PetscScalar *aa, *ba; 1492 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1493 PetscSF rowsf, sf; 1494 IS parcolp = NULL; 1495 PetscBool done; 1496 1497 PetscFunctionBegin; 1498 PetscCall(MatGetLocalSize(A, &m, &n)); 1499 PetscCall(ISGetIndices(rowp, &rwant)); 1500 PetscCall(ISGetIndices(colp, &cwant)); 1501 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1502 1503 /* Invert row permutation to find out where my rows should go */ 1504 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1505 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1506 PetscCall(PetscSFSetFromOptions(rowsf)); 1507 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1508 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1509 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1510 1511 /* Invert column permutation to find out where my columns should go */ 1512 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1513 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1514 PetscCall(PetscSFSetFromOptions(sf)); 1515 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1516 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1517 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1518 PetscCall(PetscSFDestroy(&sf)); 1519 1520 PetscCall(ISRestoreIndices(rowp, &rwant)); 1521 PetscCall(ISRestoreIndices(colp, &cwant)); 1522 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1523 1524 /* Find out where my gcols should go */ 1525 PetscCall(MatGetSize(aB, NULL, &ng)); 1526 PetscCall(PetscMalloc1(ng, &gcdest)); 1527 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1528 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1529 PetscCall(PetscSFSetFromOptions(sf)); 1530 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1531 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1532 PetscCall(PetscSFDestroy(&sf)); 1533 1534 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1535 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1536 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1537 for (i = 0; i < m; i++) { 1538 PetscInt row = rdest[i]; 1539 PetscMPIInt rowner; 1540 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1541 for (j = ai[i]; j < ai[i + 1]; j++) { 1542 PetscInt col = cdest[aj[j]]; 1543 PetscMPIInt cowner; 1544 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1545 if (rowner == cowner) dnnz[i]++; 1546 else onnz[i]++; 1547 } 1548 for (j = bi[i]; j < bi[i + 1]; j++) { 1549 PetscInt col = gcdest[bj[j]]; 1550 PetscMPIInt cowner; 1551 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1552 if (rowner == cowner) dnnz[i]++; 1553 else onnz[i]++; 1554 } 1555 } 1556 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1557 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1558 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1559 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1560 PetscCall(PetscSFDestroy(&rowsf)); 1561 1562 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1563 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1564 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1565 for (i = 0; i < m; i++) { 1566 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1567 PetscInt j0, rowlen; 1568 rowlen = ai[i + 1] - ai[i]; 1569 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1570 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1571 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1572 } 1573 rowlen = bi[i + 1] - bi[i]; 1574 for (j0 = j = 0; j < rowlen; j0 = j) { 1575 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1576 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1577 } 1578 } 1579 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1580 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1581 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1582 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1583 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1584 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1585 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1586 PetscCall(PetscFree3(work, rdest, cdest)); 1587 PetscCall(PetscFree(gcdest)); 1588 if (parcolp) PetscCall(ISDestroy(&colp)); 1589 *B = Aperm; 1590 PetscFunctionReturn(PETSC_SUCCESS); 1591 } 1592 1593 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1594 { 1595 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1596 1597 PetscFunctionBegin; 1598 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1599 if (ghosts) *ghosts = aij->garray; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1604 { 1605 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1606 Mat A = mat->A, B = mat->B; 1607 PetscLogDouble isend[5], irecv[5]; 1608 1609 PetscFunctionBegin; 1610 info->block_size = 1.0; 1611 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1612 1613 isend[0] = info->nz_used; 1614 isend[1] = info->nz_allocated; 1615 isend[2] = info->nz_unneeded; 1616 isend[3] = info->memory; 1617 isend[4] = info->mallocs; 1618 1619 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1620 1621 isend[0] += info->nz_used; 1622 isend[1] += info->nz_allocated; 1623 isend[2] += info->nz_unneeded; 1624 isend[3] += info->memory; 1625 isend[4] += info->mallocs; 1626 if (flag == MAT_LOCAL) { 1627 info->nz_used = isend[0]; 1628 info->nz_allocated = isend[1]; 1629 info->nz_unneeded = isend[2]; 1630 info->memory = isend[3]; 1631 info->mallocs = isend[4]; 1632 } else if (flag == MAT_GLOBAL_MAX) { 1633 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1634 1635 info->nz_used = irecv[0]; 1636 info->nz_allocated = irecv[1]; 1637 info->nz_unneeded = irecv[2]; 1638 info->memory = irecv[3]; 1639 info->mallocs = irecv[4]; 1640 } else if (flag == MAT_GLOBAL_SUM) { 1641 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1642 1643 info->nz_used = irecv[0]; 1644 info->nz_allocated = irecv[1]; 1645 info->nz_unneeded = irecv[2]; 1646 info->memory = irecv[3]; 1647 info->mallocs = irecv[4]; 1648 } 1649 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1650 info->fill_ratio_needed = 0; 1651 info->factor_mallocs = 0; 1652 PetscFunctionReturn(PETSC_SUCCESS); 1653 } 1654 1655 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1656 { 1657 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1658 1659 PetscFunctionBegin; 1660 switch (op) { 1661 case MAT_NEW_NONZERO_LOCATIONS: 1662 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1663 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1664 case MAT_KEEP_NONZERO_PATTERN: 1665 case MAT_NEW_NONZERO_LOCATION_ERR: 1666 case MAT_USE_INODES: 1667 case MAT_IGNORE_ZERO_ENTRIES: 1668 case MAT_FORM_EXPLICIT_TRANSPOSE: 1669 MatCheckPreallocated(A, 1); 1670 PetscCall(MatSetOption(a->A, op, flg)); 1671 PetscCall(MatSetOption(a->B, op, flg)); 1672 break; 1673 case MAT_ROW_ORIENTED: 1674 MatCheckPreallocated(A, 1); 1675 a->roworiented = flg; 1676 1677 PetscCall(MatSetOption(a->A, op, flg)); 1678 PetscCall(MatSetOption(a->B, op, flg)); 1679 break; 1680 case MAT_FORCE_DIAGONAL_ENTRIES: 1681 case MAT_SORTED_FULL: 1682 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1683 break; 1684 case MAT_IGNORE_OFF_PROC_ENTRIES: 1685 a->donotstash = flg; 1686 break; 1687 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1688 case MAT_SPD: 1689 case MAT_SYMMETRIC: 1690 case MAT_STRUCTURALLY_SYMMETRIC: 1691 case MAT_HERMITIAN: 1692 case MAT_SYMMETRY_ETERNAL: 1693 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1694 case MAT_SPD_ETERNAL: 1695 /* if the diagonal matrix is square it inherits some of the properties above */ 1696 break; 1697 case MAT_SUBMAT_SINGLEIS: 1698 A->submat_singleis = flg; 1699 break; 1700 case MAT_STRUCTURE_ONLY: 1701 /* The option is handled directly by MatSetOption() */ 1702 break; 1703 default: 1704 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1705 } 1706 PetscFunctionReturn(PETSC_SUCCESS); 1707 } 1708 1709 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1710 { 1711 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1712 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1713 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1714 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1715 PetscInt *cmap, *idx_p; 1716 1717 PetscFunctionBegin; 1718 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1719 mat->getrowactive = PETSC_TRUE; 1720 1721 if (!mat->rowvalues && (idx || v)) { 1722 /* 1723 allocate enough space to hold information from the longest row. 1724 */ 1725 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1726 PetscInt max = 1, tmp; 1727 for (i = 0; i < matin->rmap->n; i++) { 1728 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1729 if (max < tmp) max = tmp; 1730 } 1731 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1732 } 1733 1734 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1735 lrow = row - rstart; 1736 1737 pvA = &vworkA; 1738 pcA = &cworkA; 1739 pvB = &vworkB; 1740 pcB = &cworkB; 1741 if (!v) { 1742 pvA = NULL; 1743 pvB = NULL; 1744 } 1745 if (!idx) { 1746 pcA = NULL; 1747 if (!v) pcB = NULL; 1748 } 1749 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1750 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1751 nztot = nzA + nzB; 1752 1753 cmap = mat->garray; 1754 if (v || idx) { 1755 if (nztot) { 1756 /* Sort by increasing column numbers, assuming A and B already sorted */ 1757 PetscInt imark = -1; 1758 if (v) { 1759 *v = v_p = mat->rowvalues; 1760 for (i = 0; i < nzB; i++) { 1761 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1762 else break; 1763 } 1764 imark = i; 1765 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1766 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1767 } 1768 if (idx) { 1769 *idx = idx_p = mat->rowindices; 1770 if (imark > -1) { 1771 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1772 } else { 1773 for (i = 0; i < nzB; i++) { 1774 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1775 else break; 1776 } 1777 imark = i; 1778 } 1779 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1780 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1781 } 1782 } else { 1783 if (idx) *idx = NULL; 1784 if (v) *v = NULL; 1785 } 1786 } 1787 *nz = nztot; 1788 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1789 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1794 { 1795 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1796 1797 PetscFunctionBegin; 1798 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1799 aij->getrowactive = PETSC_FALSE; 1800 PetscFunctionReturn(PETSC_SUCCESS); 1801 } 1802 1803 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1806 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1807 PetscInt i, j, cstart = mat->cmap->rstart; 1808 PetscReal sum = 0.0; 1809 const MatScalar *v, *amata, *bmata; 1810 1811 PetscFunctionBegin; 1812 if (aij->size == 1) { 1813 PetscCall(MatNorm(aij->A, type, norm)); 1814 } else { 1815 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1816 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1817 if (type == NORM_FROBENIUS) { 1818 v = amata; 1819 for (i = 0; i < amat->nz; i++) { 1820 sum += PetscRealPart(PetscConj(*v) * (*v)); 1821 v++; 1822 } 1823 v = bmata; 1824 for (i = 0; i < bmat->nz; i++) { 1825 sum += PetscRealPart(PetscConj(*v) * (*v)); 1826 v++; 1827 } 1828 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1829 *norm = PetscSqrtReal(*norm); 1830 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1831 } else if (type == NORM_1) { /* max column norm */ 1832 PetscReal *tmp, *tmp2; 1833 PetscInt *jj, *garray = aij->garray; 1834 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1835 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1836 *norm = 0.0; 1837 v = amata; 1838 jj = amat->j; 1839 for (j = 0; j < amat->nz; j++) { 1840 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 v = bmata; 1844 jj = bmat->j; 1845 for (j = 0; j < bmat->nz; j++) { 1846 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1847 v++; 1848 } 1849 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1850 for (j = 0; j < mat->cmap->N; j++) { 1851 if (tmp2[j] > *norm) *norm = tmp2[j]; 1852 } 1853 PetscCall(PetscFree(tmp)); 1854 PetscCall(PetscFree(tmp2)); 1855 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1856 } else if (type == NORM_INFINITY) { /* max row norm */ 1857 PetscReal ntemp = 0.0; 1858 for (j = 0; j < aij->A->rmap->n; j++) { 1859 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1860 sum = 0.0; 1861 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1862 sum += PetscAbsScalar(*v); 1863 v++; 1864 } 1865 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1866 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1867 sum += PetscAbsScalar(*v); 1868 v++; 1869 } 1870 if (sum > ntemp) ntemp = sum; 1871 } 1872 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1873 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1874 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1875 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1876 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1877 } 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1882 { 1883 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1884 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1885 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1886 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1887 Mat B, A_diag, *B_diag; 1888 const MatScalar *pbv, *bv; 1889 1890 PetscFunctionBegin; 1891 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1892 ma = A->rmap->n; 1893 na = A->cmap->n; 1894 mb = a->B->rmap->n; 1895 nb = a->B->cmap->n; 1896 ai = Aloc->i; 1897 aj = Aloc->j; 1898 bi = Bloc->i; 1899 bj = Bloc->j; 1900 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1901 PetscInt *d_nnz, *g_nnz, *o_nnz; 1902 PetscSFNode *oloc; 1903 PETSC_UNUSED PetscSF sf; 1904 1905 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1906 /* compute d_nnz for preallocation */ 1907 PetscCall(PetscArrayzero(d_nnz, na)); 1908 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1909 /* compute local off-diagonal contributions */ 1910 PetscCall(PetscArrayzero(g_nnz, nb)); 1911 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1912 /* map those to global */ 1913 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1914 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1915 PetscCall(PetscSFSetFromOptions(sf)); 1916 PetscCall(PetscArrayzero(o_nnz, na)); 1917 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1918 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1919 PetscCall(PetscSFDestroy(&sf)); 1920 1921 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1922 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1923 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1924 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1925 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1926 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1927 } else { 1928 B = *matout; 1929 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1930 } 1931 1932 b = (Mat_MPIAIJ *)B->data; 1933 A_diag = a->A; 1934 B_diag = &b->A; 1935 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1936 A_diag_ncol = A_diag->cmap->N; 1937 B_diag_ilen = sub_B_diag->ilen; 1938 B_diag_i = sub_B_diag->i; 1939 1940 /* Set ilen for diagonal of B */ 1941 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1942 1943 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1944 very quickly (=without using MatSetValues), because all writes are local. */ 1945 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1946 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1947 1948 /* copy over the B part */ 1949 PetscCall(PetscMalloc1(bi[mb], &cols)); 1950 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1951 pbv = bv; 1952 row = A->rmap->rstart; 1953 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1954 cols_tmp = cols; 1955 for (i = 0; i < mb; i++) { 1956 ncol = bi[i + 1] - bi[i]; 1957 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1958 row++; 1959 if (pbv) pbv += ncol; 1960 if (cols_tmp) cols_tmp += ncol; 1961 } 1962 PetscCall(PetscFree(cols)); 1963 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1964 1965 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1966 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1967 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1968 *matout = B; 1969 } else { 1970 PetscCall(MatHeaderMerge(A, &B)); 1971 } 1972 PetscFunctionReturn(PETSC_SUCCESS); 1973 } 1974 1975 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1976 { 1977 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1978 Mat a = aij->A, b = aij->B; 1979 PetscInt s1, s2, s3; 1980 1981 PetscFunctionBegin; 1982 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1983 if (rr) { 1984 PetscCall(VecGetLocalSize(rr, &s1)); 1985 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1986 /* Overlap communication with computation. */ 1987 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1988 } 1989 if (ll) { 1990 PetscCall(VecGetLocalSize(ll, &s1)); 1991 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1992 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1993 } 1994 /* scale the diagonal block */ 1995 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1996 1997 if (rr) { 1998 /* Do a scatter end and then right scale the off-diagonal block */ 1999 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2000 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2001 } 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2006 { 2007 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2008 2009 PetscFunctionBegin; 2010 PetscCall(MatSetUnfactored(a->A)); 2011 PetscFunctionReturn(PETSC_SUCCESS); 2012 } 2013 2014 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2015 { 2016 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2017 Mat a, b, c, d; 2018 PetscBool flg; 2019 2020 PetscFunctionBegin; 2021 a = matA->A; 2022 b = matA->B; 2023 c = matB->A; 2024 d = matB->B; 2025 2026 PetscCall(MatEqual(a, c, &flg)); 2027 if (flg) PetscCall(MatEqual(b, d, &flg)); 2028 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2029 PetscFunctionReturn(PETSC_SUCCESS); 2030 } 2031 2032 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2033 { 2034 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2035 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2036 2037 PetscFunctionBegin; 2038 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2039 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2040 /* because of the column compression in the off-processor part of the matrix a->B, 2041 the number of columns in a->B and b->B may be different, hence we cannot call 2042 the MatCopy() directly on the two parts. If need be, we can provide a more 2043 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2044 then copying the submatrices */ 2045 PetscCall(MatCopy_Basic(A, B, str)); 2046 } else { 2047 PetscCall(MatCopy(a->A, b->A, str)); 2048 PetscCall(MatCopy(a->B, b->B, str)); 2049 } 2050 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2051 PetscFunctionReturn(PETSC_SUCCESS); 2052 } 2053 2054 /* 2055 Computes the number of nonzeros per row needed for preallocation when X and Y 2056 have different nonzero structure. 2057 */ 2058 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2059 { 2060 PetscInt i, j, k, nzx, nzy; 2061 2062 PetscFunctionBegin; 2063 /* Set the number of nonzeros in the new matrix */ 2064 for (i = 0; i < m; i++) { 2065 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2066 nzx = xi[i + 1] - xi[i]; 2067 nzy = yi[i + 1] - yi[i]; 2068 nnz[i] = 0; 2069 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2070 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2071 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2072 nnz[i]++; 2073 } 2074 for (; k < nzy; k++) nnz[i]++; 2075 } 2076 PetscFunctionReturn(PETSC_SUCCESS); 2077 } 2078 2079 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2080 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2081 { 2082 PetscInt m = Y->rmap->N; 2083 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2084 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2085 2086 PetscFunctionBegin; 2087 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2092 { 2093 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2094 2095 PetscFunctionBegin; 2096 if (str == SAME_NONZERO_PATTERN) { 2097 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2098 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2099 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2100 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2101 } else { 2102 Mat B; 2103 PetscInt *nnz_d, *nnz_o; 2104 2105 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2106 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2107 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2108 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2109 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2110 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2111 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2112 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2113 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2114 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2115 PetscCall(MatHeaderMerge(Y, &B)); 2116 PetscCall(PetscFree(nnz_d)); 2117 PetscCall(PetscFree(nnz_o)); 2118 } 2119 PetscFunctionReturn(PETSC_SUCCESS); 2120 } 2121 2122 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2123 2124 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2125 { 2126 PetscFunctionBegin; 2127 if (PetscDefined(USE_COMPLEX)) { 2128 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2129 2130 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2131 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2132 } 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2137 { 2138 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2139 2140 PetscFunctionBegin; 2141 PetscCall(MatRealPart(a->A)); 2142 PetscCall(MatRealPart(a->B)); 2143 PetscFunctionReturn(PETSC_SUCCESS); 2144 } 2145 2146 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2147 { 2148 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2149 2150 PetscFunctionBegin; 2151 PetscCall(MatImaginaryPart(a->A)); 2152 PetscCall(MatImaginaryPart(a->B)); 2153 PetscFunctionReturn(PETSC_SUCCESS); 2154 } 2155 2156 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2157 { 2158 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2159 PetscInt i, *idxb = NULL, m = A->rmap->n; 2160 PetscScalar *va, *vv; 2161 Vec vB, vA; 2162 const PetscScalar *vb; 2163 2164 PetscFunctionBegin; 2165 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2166 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2167 2168 PetscCall(VecGetArrayWrite(vA, &va)); 2169 if (idx) { 2170 for (i = 0; i < m; i++) { 2171 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2172 } 2173 } 2174 2175 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2176 PetscCall(PetscMalloc1(m, &idxb)); 2177 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2178 2179 PetscCall(VecGetArrayWrite(v, &vv)); 2180 PetscCall(VecGetArrayRead(vB, &vb)); 2181 for (i = 0; i < m; i++) { 2182 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2183 vv[i] = vb[i]; 2184 if (idx) idx[i] = a->garray[idxb[i]]; 2185 } else { 2186 vv[i] = va[i]; 2187 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2188 } 2189 } 2190 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2191 PetscCall(VecRestoreArrayWrite(vA, &va)); 2192 PetscCall(VecRestoreArrayRead(vB, &vb)); 2193 PetscCall(PetscFree(idxb)); 2194 PetscCall(VecDestroy(&vA)); 2195 PetscCall(VecDestroy(&vB)); 2196 PetscFunctionReturn(PETSC_SUCCESS); 2197 } 2198 2199 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2200 { 2201 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2202 PetscInt m = A->rmap->n; 2203 Vec vB, vA; 2204 2205 PetscFunctionBegin; 2206 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2207 PetscCall(MatGetRowSumAbs(a->A, vA)); 2208 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2209 PetscCall(MatGetRowSumAbs(a->B, vB)); 2210 PetscCall(VecAXPY(vA, 1.0, vB)); 2211 PetscCall(VecDestroy(&vB)); 2212 PetscCall(VecCopy(vA, v)); 2213 PetscCall(VecDestroy(&vA)); 2214 PetscFunctionReturn(PETSC_SUCCESS); 2215 } 2216 2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2218 { 2219 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2220 PetscInt m = A->rmap->n, n = A->cmap->n; 2221 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2222 PetscInt *cmap = mat->garray; 2223 PetscInt *diagIdx, *offdiagIdx; 2224 Vec diagV, offdiagV; 2225 PetscScalar *a, *diagA, *offdiagA; 2226 const PetscScalar *ba, *bav; 2227 PetscInt r, j, col, ncols, *bi, *bj; 2228 Mat B = mat->B; 2229 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2230 2231 PetscFunctionBegin; 2232 /* When a process holds entire A and other processes have no entry */ 2233 if (A->cmap->N == n) { 2234 PetscCall(VecGetArrayWrite(v, &diagA)); 2235 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2236 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2237 PetscCall(VecDestroy(&diagV)); 2238 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } else if (n == 0) { 2241 if (m) { 2242 PetscCall(VecGetArrayWrite(v, &a)); 2243 for (r = 0; r < m; r++) { 2244 a[r] = 0.0; 2245 if (idx) idx[r] = -1; 2246 } 2247 PetscCall(VecRestoreArrayWrite(v, &a)); 2248 } 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2253 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2255 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2256 2257 /* Get offdiagIdx[] for implicit 0.0 */ 2258 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2259 ba = bav; 2260 bi = b->i; 2261 bj = b->j; 2262 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2263 for (r = 0; r < m; r++) { 2264 ncols = bi[r + 1] - bi[r]; 2265 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2266 offdiagA[r] = *ba; 2267 offdiagIdx[r] = cmap[0]; 2268 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2269 offdiagA[r] = 0.0; 2270 2271 /* Find first hole in the cmap */ 2272 for (j = 0; j < ncols; j++) { 2273 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2274 if (col > j && j < cstart) { 2275 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2276 break; 2277 } else if (col > j + n && j >= cstart) { 2278 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2279 break; 2280 } 2281 } 2282 if (j == ncols && ncols < A->cmap->N - n) { 2283 /* a hole is outside compressed Bcols */ 2284 if (ncols == 0) { 2285 if (cstart) { 2286 offdiagIdx[r] = 0; 2287 } else offdiagIdx[r] = cend; 2288 } else { /* ncols > 0 */ 2289 offdiagIdx[r] = cmap[ncols - 1] + 1; 2290 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2291 } 2292 } 2293 } 2294 2295 for (j = 0; j < ncols; j++) { 2296 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2297 offdiagA[r] = *ba; 2298 offdiagIdx[r] = cmap[*bj]; 2299 } 2300 ba++; 2301 bj++; 2302 } 2303 } 2304 2305 PetscCall(VecGetArrayWrite(v, &a)); 2306 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2307 for (r = 0; r < m; ++r) { 2308 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2309 a[r] = diagA[r]; 2310 if (idx) idx[r] = cstart + diagIdx[r]; 2311 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2312 a[r] = diagA[r]; 2313 if (idx) { 2314 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2315 idx[r] = cstart + diagIdx[r]; 2316 } else idx[r] = offdiagIdx[r]; 2317 } 2318 } else { 2319 a[r] = offdiagA[r]; 2320 if (idx) idx[r] = offdiagIdx[r]; 2321 } 2322 } 2323 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2324 PetscCall(VecRestoreArrayWrite(v, &a)); 2325 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2326 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2327 PetscCall(VecDestroy(&diagV)); 2328 PetscCall(VecDestroy(&offdiagV)); 2329 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2330 PetscFunctionReturn(PETSC_SUCCESS); 2331 } 2332 2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2334 { 2335 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2336 PetscInt m = A->rmap->n, n = A->cmap->n; 2337 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2338 PetscInt *cmap = mat->garray; 2339 PetscInt *diagIdx, *offdiagIdx; 2340 Vec diagV, offdiagV; 2341 PetscScalar *a, *diagA, *offdiagA; 2342 const PetscScalar *ba, *bav; 2343 PetscInt r, j, col, ncols, *bi, *bj; 2344 Mat B = mat->B; 2345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2346 2347 PetscFunctionBegin; 2348 /* When a process holds entire A and other processes have no entry */ 2349 if (A->cmap->N == n) { 2350 PetscCall(VecGetArrayWrite(v, &diagA)); 2351 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2352 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2353 PetscCall(VecDestroy(&diagV)); 2354 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } else if (n == 0) { 2357 if (m) { 2358 PetscCall(VecGetArrayWrite(v, &a)); 2359 for (r = 0; r < m; r++) { 2360 a[r] = PETSC_MAX_REAL; 2361 if (idx) idx[r] = -1; 2362 } 2363 PetscCall(VecRestoreArrayWrite(v, &a)); 2364 } 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2369 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2371 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2372 2373 /* Get offdiagIdx[] for implicit 0.0 */ 2374 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2375 ba = bav; 2376 bi = b->i; 2377 bj = b->j; 2378 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2379 for (r = 0; r < m; r++) { 2380 ncols = bi[r + 1] - bi[r]; 2381 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2382 offdiagA[r] = *ba; 2383 offdiagIdx[r] = cmap[0]; 2384 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2385 offdiagA[r] = 0.0; 2386 2387 /* Find first hole in the cmap */ 2388 for (j = 0; j < ncols; j++) { 2389 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2390 if (col > j && j < cstart) { 2391 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2392 break; 2393 } else if (col > j + n && j >= cstart) { 2394 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2395 break; 2396 } 2397 } 2398 if (j == ncols && ncols < A->cmap->N - n) { 2399 /* a hole is outside compressed Bcols */ 2400 if (ncols == 0) { 2401 if (cstart) { 2402 offdiagIdx[r] = 0; 2403 } else offdiagIdx[r] = cend; 2404 } else { /* ncols > 0 */ 2405 offdiagIdx[r] = cmap[ncols - 1] + 1; 2406 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2407 } 2408 } 2409 } 2410 2411 for (j = 0; j < ncols; j++) { 2412 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2413 offdiagA[r] = *ba; 2414 offdiagIdx[r] = cmap[*bj]; 2415 } 2416 ba++; 2417 bj++; 2418 } 2419 } 2420 2421 PetscCall(VecGetArrayWrite(v, &a)); 2422 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2423 for (r = 0; r < m; ++r) { 2424 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2425 a[r] = diagA[r]; 2426 if (idx) idx[r] = cstart + diagIdx[r]; 2427 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2428 a[r] = diagA[r]; 2429 if (idx) { 2430 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2431 idx[r] = cstart + diagIdx[r]; 2432 } else idx[r] = offdiagIdx[r]; 2433 } 2434 } else { 2435 a[r] = offdiagA[r]; 2436 if (idx) idx[r] = offdiagIdx[r]; 2437 } 2438 } 2439 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2440 PetscCall(VecRestoreArrayWrite(v, &a)); 2441 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2442 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2443 PetscCall(VecDestroy(&diagV)); 2444 PetscCall(VecDestroy(&offdiagV)); 2445 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2446 PetscFunctionReturn(PETSC_SUCCESS); 2447 } 2448 2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2450 { 2451 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2452 PetscInt m = A->rmap->n, n = A->cmap->n; 2453 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2454 PetscInt *cmap = mat->garray; 2455 PetscInt *diagIdx, *offdiagIdx; 2456 Vec diagV, offdiagV; 2457 PetscScalar *a, *diagA, *offdiagA; 2458 const PetscScalar *ba, *bav; 2459 PetscInt r, j, col, ncols, *bi, *bj; 2460 Mat B = mat->B; 2461 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2462 2463 PetscFunctionBegin; 2464 /* When a process holds entire A and other processes have no entry */ 2465 if (A->cmap->N == n) { 2466 PetscCall(VecGetArrayWrite(v, &diagA)); 2467 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2468 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2469 PetscCall(VecDestroy(&diagV)); 2470 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } else if (n == 0) { 2473 if (m) { 2474 PetscCall(VecGetArrayWrite(v, &a)); 2475 for (r = 0; r < m; r++) { 2476 a[r] = PETSC_MIN_REAL; 2477 if (idx) idx[r] = -1; 2478 } 2479 PetscCall(VecRestoreArrayWrite(v, &a)); 2480 } 2481 PetscFunctionReturn(PETSC_SUCCESS); 2482 } 2483 2484 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2485 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2487 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2488 2489 /* Get offdiagIdx[] for implicit 0.0 */ 2490 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2491 ba = bav; 2492 bi = b->i; 2493 bj = b->j; 2494 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2495 for (r = 0; r < m; r++) { 2496 ncols = bi[r + 1] - bi[r]; 2497 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2498 offdiagA[r] = *ba; 2499 offdiagIdx[r] = cmap[0]; 2500 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2501 offdiagA[r] = 0.0; 2502 2503 /* Find first hole in the cmap */ 2504 for (j = 0; j < ncols; j++) { 2505 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2506 if (col > j && j < cstart) { 2507 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2508 break; 2509 } else if (col > j + n && j >= cstart) { 2510 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2511 break; 2512 } 2513 } 2514 if (j == ncols && ncols < A->cmap->N - n) { 2515 /* a hole is outside compressed Bcols */ 2516 if (ncols == 0) { 2517 if (cstart) { 2518 offdiagIdx[r] = 0; 2519 } else offdiagIdx[r] = cend; 2520 } else { /* ncols > 0 */ 2521 offdiagIdx[r] = cmap[ncols - 1] + 1; 2522 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2523 } 2524 } 2525 } 2526 2527 for (j = 0; j < ncols; j++) { 2528 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2529 offdiagA[r] = *ba; 2530 offdiagIdx[r] = cmap[*bj]; 2531 } 2532 ba++; 2533 bj++; 2534 } 2535 } 2536 2537 PetscCall(VecGetArrayWrite(v, &a)); 2538 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2539 for (r = 0; r < m; ++r) { 2540 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 if (idx) idx[r] = cstart + diagIdx[r]; 2543 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2544 a[r] = diagA[r]; 2545 if (idx) { 2546 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2547 idx[r] = cstart + diagIdx[r]; 2548 } else idx[r] = offdiagIdx[r]; 2549 } 2550 } else { 2551 a[r] = offdiagA[r]; 2552 if (idx) idx[r] = offdiagIdx[r]; 2553 } 2554 } 2555 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2556 PetscCall(VecRestoreArrayWrite(v, &a)); 2557 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2558 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2559 PetscCall(VecDestroy(&diagV)); 2560 PetscCall(VecDestroy(&offdiagV)); 2561 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2566 { 2567 Mat *dummy; 2568 2569 PetscFunctionBegin; 2570 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2571 *newmat = *dummy; 2572 PetscCall(PetscFree(dummy)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2577 { 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2582 A->factorerrortype = a->A->factorerrortype; 2583 PetscFunctionReturn(PETSC_SUCCESS); 2584 } 2585 2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2587 { 2588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2589 2590 PetscFunctionBegin; 2591 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2592 PetscCall(MatSetRandom(aij->A, rctx)); 2593 if (x->assembled) { 2594 PetscCall(MatSetRandom(aij->B, rctx)); 2595 } else { 2596 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2597 } 2598 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2599 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2600 PetscFunctionReturn(PETSC_SUCCESS); 2601 } 2602 2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2604 { 2605 PetscFunctionBegin; 2606 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2607 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 /*@ 2612 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2613 2614 Not Collective 2615 2616 Input Parameter: 2617 . A - the matrix 2618 2619 Output Parameter: 2620 . nz - the number of nonzeros 2621 2622 Level: advanced 2623 2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2625 @*/ 2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2627 { 2628 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2629 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2630 PetscBool isaij; 2631 2632 PetscFunctionBegin; 2633 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2634 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2635 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 /*@ 2640 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2641 2642 Collective 2643 2644 Input Parameters: 2645 + A - the matrix 2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2647 2648 Level: advanced 2649 2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2651 @*/ 2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2653 { 2654 PetscFunctionBegin; 2655 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2660 { 2661 PetscBool sc = PETSC_FALSE, flg; 2662 2663 PetscFunctionBegin; 2664 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2665 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2666 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2667 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2668 PetscOptionsHeadEnd(); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2673 { 2674 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2675 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2676 2677 PetscFunctionBegin; 2678 if (!Y->preallocated) { 2679 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2680 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2681 PetscInt nonew = aij->nonew; 2682 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2683 aij->nonew = nonew; 2684 } 2685 PetscCall(MatShift_Basic(Y, a)); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2690 { 2691 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2692 2693 PetscFunctionBegin; 2694 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2695 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2696 if (d) { 2697 PetscInt rstart; 2698 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2699 *d += rstart; 2700 } 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2705 { 2706 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2710 PetscFunctionReturn(PETSC_SUCCESS); 2711 } 2712 2713 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2714 { 2715 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2716 2717 PetscFunctionBegin; 2718 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2720 PetscFunctionReturn(PETSC_SUCCESS); 2721 } 2722 2723 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2724 MatGetRow_MPIAIJ, 2725 MatRestoreRow_MPIAIJ, 2726 MatMult_MPIAIJ, 2727 /* 4*/ MatMultAdd_MPIAIJ, 2728 MatMultTranspose_MPIAIJ, 2729 MatMultTransposeAdd_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 /*10*/ NULL, 2734 NULL, 2735 NULL, 2736 MatSOR_MPIAIJ, 2737 MatTranspose_MPIAIJ, 2738 /*15*/ MatGetInfo_MPIAIJ, 2739 MatEqual_MPIAIJ, 2740 MatGetDiagonal_MPIAIJ, 2741 MatDiagonalScale_MPIAIJ, 2742 MatNorm_MPIAIJ, 2743 /*20*/ MatAssemblyBegin_MPIAIJ, 2744 MatAssemblyEnd_MPIAIJ, 2745 MatSetOption_MPIAIJ, 2746 MatZeroEntries_MPIAIJ, 2747 /*24*/ MatZeroRows_MPIAIJ, 2748 NULL, 2749 NULL, 2750 NULL, 2751 NULL, 2752 /*29*/ MatSetUp_MPI_Hash, 2753 NULL, 2754 NULL, 2755 MatGetDiagonalBlock_MPIAIJ, 2756 NULL, 2757 /*34*/ MatDuplicate_MPIAIJ, 2758 NULL, 2759 NULL, 2760 NULL, 2761 NULL, 2762 /*39*/ MatAXPY_MPIAIJ, 2763 MatCreateSubMatrices_MPIAIJ, 2764 MatIncreaseOverlap_MPIAIJ, 2765 MatGetValues_MPIAIJ, 2766 MatCopy_MPIAIJ, 2767 /*44*/ MatGetRowMax_MPIAIJ, 2768 MatScale_MPIAIJ, 2769 MatShift_MPIAIJ, 2770 MatDiagonalSet_MPIAIJ, 2771 MatZeroRowsColumns_MPIAIJ, 2772 /*49*/ MatSetRandom_MPIAIJ, 2773 MatGetRowIJ_MPIAIJ, 2774 MatRestoreRowIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 /*54*/ MatFDColoringCreate_MPIXAIJ, 2778 NULL, 2779 MatSetUnfactored_MPIAIJ, 2780 MatPermute_MPIAIJ, 2781 NULL, 2782 /*59*/ MatCreateSubMatrix_MPIAIJ, 2783 MatDestroy_MPIAIJ, 2784 MatView_MPIAIJ, 2785 NULL, 2786 NULL, 2787 /*64*/ NULL, 2788 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2793 MatGetRowMinAbs_MPIAIJ, 2794 NULL, 2795 NULL, 2796 NULL, 2797 NULL, 2798 /*75*/ MatFDColoringApply_AIJ, 2799 MatSetFromOptions_MPIAIJ, 2800 NULL, 2801 NULL, 2802 MatFindZeroDiagonals_MPIAIJ, 2803 /*80*/ NULL, 2804 NULL, 2805 NULL, 2806 /*83*/ MatLoad_MPIAIJ, 2807 MatIsSymmetric_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 /*89*/ NULL, 2813 NULL, 2814 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2815 NULL, 2816 NULL, 2817 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 MatBindToCPU_MPIAIJ, 2822 /*99*/ MatProductSetFromOptions_MPIAIJ, 2823 NULL, 2824 NULL, 2825 MatConjugate_MPIAIJ, 2826 NULL, 2827 /*104*/ MatSetValuesRow_MPIAIJ, 2828 MatRealPart_MPIAIJ, 2829 MatImaginaryPart_MPIAIJ, 2830 NULL, 2831 NULL, 2832 /*109*/ NULL, 2833 NULL, 2834 MatGetRowMin_MPIAIJ, 2835 NULL, 2836 MatMissingDiagonal_MPIAIJ, 2837 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2838 NULL, 2839 MatGetGhosts_MPIAIJ, 2840 NULL, 2841 NULL, 2842 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2843 NULL, 2844 NULL, 2845 NULL, 2846 MatGetMultiProcBlock_MPIAIJ, 2847 /*124*/ MatFindNonzeroRows_MPIAIJ, 2848 MatGetColumnReductions_MPIAIJ, 2849 MatInvertBlockDiagonal_MPIAIJ, 2850 MatInvertVariableBlockDiagonal_MPIAIJ, 2851 MatCreateSubMatricesMPI_MPIAIJ, 2852 /*129*/ NULL, 2853 NULL, 2854 NULL, 2855 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2856 NULL, 2857 /*134*/ NULL, 2858 NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 /*139*/ MatSetBlockSizes_MPIAIJ, 2863 NULL, 2864 NULL, 2865 MatFDColoringSetUp_MPIXAIJ, 2866 MatFindOffBlockDiagonalEntries_MPIAIJ, 2867 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2868 /*145*/ NULL, 2869 NULL, 2870 NULL, 2871 MatCreateGraph_Simple_AIJ, 2872 NULL, 2873 /*150*/ NULL, 2874 MatEliminateZeros_MPIAIJ, 2875 MatGetRowSumAbs_MPIAIJ}; 2876 2877 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2878 { 2879 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2880 2881 PetscFunctionBegin; 2882 PetscCall(MatStoreValues(aij->A)); 2883 PetscCall(MatStoreValues(aij->B)); 2884 PetscFunctionReturn(PETSC_SUCCESS); 2885 } 2886 2887 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2888 { 2889 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2890 2891 PetscFunctionBegin; 2892 PetscCall(MatRetrieveValues(aij->A)); 2893 PetscCall(MatRetrieveValues(aij->B)); 2894 PetscFunctionReturn(PETSC_SUCCESS); 2895 } 2896 2897 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2898 { 2899 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2900 PetscMPIInt size; 2901 2902 PetscFunctionBegin; 2903 if (B->hash_active) { 2904 B->ops[0] = b->cops; 2905 B->hash_active = PETSC_FALSE; 2906 } 2907 PetscCall(PetscLayoutSetUp(B->rmap)); 2908 PetscCall(PetscLayoutSetUp(B->cmap)); 2909 2910 #if defined(PETSC_USE_CTABLE) 2911 PetscCall(PetscHMapIDestroy(&b->colmap)); 2912 #else 2913 PetscCall(PetscFree(b->colmap)); 2914 #endif 2915 PetscCall(PetscFree(b->garray)); 2916 PetscCall(VecDestroy(&b->lvec)); 2917 PetscCall(VecScatterDestroy(&b->Mvctx)); 2918 2919 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2920 2921 MatSeqXAIJGetOptions_Private(b->B); 2922 PetscCall(MatDestroy(&b->B)); 2923 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2924 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2925 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2926 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2927 MatSeqXAIJRestoreOptions_Private(b->B); 2928 2929 MatSeqXAIJGetOptions_Private(b->A); 2930 PetscCall(MatDestroy(&b->A)); 2931 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2932 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2933 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2934 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2935 MatSeqXAIJRestoreOptions_Private(b->A); 2936 2937 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2938 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2939 B->preallocated = PETSC_TRUE; 2940 B->was_assembled = PETSC_FALSE; 2941 B->assembled = PETSC_FALSE; 2942 PetscFunctionReturn(PETSC_SUCCESS); 2943 } 2944 2945 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2946 { 2947 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2948 2949 PetscFunctionBegin; 2950 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2951 PetscCall(PetscLayoutSetUp(B->rmap)); 2952 PetscCall(PetscLayoutSetUp(B->cmap)); 2953 2954 #if defined(PETSC_USE_CTABLE) 2955 PetscCall(PetscHMapIDestroy(&b->colmap)); 2956 #else 2957 PetscCall(PetscFree(b->colmap)); 2958 #endif 2959 PetscCall(PetscFree(b->garray)); 2960 PetscCall(VecDestroy(&b->lvec)); 2961 PetscCall(VecScatterDestroy(&b->Mvctx)); 2962 2963 PetscCall(MatResetPreallocation(b->A)); 2964 PetscCall(MatResetPreallocation(b->B)); 2965 B->preallocated = PETSC_TRUE; 2966 B->was_assembled = PETSC_FALSE; 2967 B->assembled = PETSC_FALSE; 2968 PetscFunctionReturn(PETSC_SUCCESS); 2969 } 2970 2971 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2972 { 2973 Mat mat; 2974 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2975 2976 PetscFunctionBegin; 2977 *newmat = NULL; 2978 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2979 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2980 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2981 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2982 a = (Mat_MPIAIJ *)mat->data; 2983 2984 mat->factortype = matin->factortype; 2985 mat->assembled = matin->assembled; 2986 mat->insertmode = NOT_SET_VALUES; 2987 2988 a->size = oldmat->size; 2989 a->rank = oldmat->rank; 2990 a->donotstash = oldmat->donotstash; 2991 a->roworiented = oldmat->roworiented; 2992 a->rowindices = NULL; 2993 a->rowvalues = NULL; 2994 a->getrowactive = PETSC_FALSE; 2995 2996 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2997 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2998 if (matin->hash_active) { 2999 PetscCall(MatSetUp(mat)); 3000 } else { 3001 mat->preallocated = matin->preallocated; 3002 if (oldmat->colmap) { 3003 #if defined(PETSC_USE_CTABLE) 3004 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3005 #else 3006 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3007 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3008 #endif 3009 } else a->colmap = NULL; 3010 if (oldmat->garray) { 3011 PetscInt len; 3012 len = oldmat->B->cmap->n; 3013 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3014 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3015 } else a->garray = NULL; 3016 3017 /* It may happen MatDuplicate is called with a non-assembled matrix 3018 In fact, MatDuplicate only requires the matrix to be preallocated 3019 This may happen inside a DMCreateMatrix_Shell */ 3020 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3021 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3022 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3023 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3024 } 3025 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3026 *newmat = mat; 3027 PetscFunctionReturn(PETSC_SUCCESS); 3028 } 3029 3030 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3031 { 3032 PetscBool isbinary, ishdf5; 3033 3034 PetscFunctionBegin; 3035 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3036 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3037 /* force binary viewer to load .info file if it has not yet done so */ 3038 PetscCall(PetscViewerSetUp(viewer)); 3039 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3041 if (isbinary) { 3042 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3043 } else if (ishdf5) { 3044 #if defined(PETSC_HAVE_HDF5) 3045 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3046 #else 3047 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3048 #endif 3049 } else { 3050 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3051 } 3052 PetscFunctionReturn(PETSC_SUCCESS); 3053 } 3054 3055 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3056 { 3057 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3058 PetscInt *rowidxs, *colidxs; 3059 PetscScalar *matvals; 3060 3061 PetscFunctionBegin; 3062 PetscCall(PetscViewerSetUp(viewer)); 3063 3064 /* read in matrix header */ 3065 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3066 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3067 M = header[1]; 3068 N = header[2]; 3069 nz = header[3]; 3070 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3071 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3072 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3073 3074 /* set block sizes from the viewer's .info file */ 3075 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3076 /* set global sizes if not set already */ 3077 if (mat->rmap->N < 0) mat->rmap->N = M; 3078 if (mat->cmap->N < 0) mat->cmap->N = N; 3079 PetscCall(PetscLayoutSetUp(mat->rmap)); 3080 PetscCall(PetscLayoutSetUp(mat->cmap)); 3081 3082 /* check if the matrix sizes are correct */ 3083 PetscCall(MatGetSize(mat, &rows, &cols)); 3084 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3085 3086 /* read in row lengths and build row indices */ 3087 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3088 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3089 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3090 rowidxs[0] = 0; 3091 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3092 if (nz != PETSC_MAX_INT) { 3093 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3094 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3095 } 3096 3097 /* read in column indices and matrix values */ 3098 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3099 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3100 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3101 /* store matrix indices and values */ 3102 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3103 PetscCall(PetscFree(rowidxs)); 3104 PetscCall(PetscFree2(colidxs, matvals)); 3105 PetscFunctionReturn(PETSC_SUCCESS); 3106 } 3107 3108 /* Not scalable because of ISAllGather() unless getting all columns. */ 3109 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3110 { 3111 IS iscol_local; 3112 PetscBool isstride; 3113 PetscMPIInt lisstride = 0, gisstride; 3114 3115 PetscFunctionBegin; 3116 /* check if we are grabbing all columns*/ 3117 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3118 3119 if (isstride) { 3120 PetscInt start, len, mstart, mlen; 3121 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3122 PetscCall(ISGetLocalSize(iscol, &len)); 3123 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3124 if (mstart == start && mlen - mstart == len) lisstride = 1; 3125 } 3126 3127 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3128 if (gisstride) { 3129 PetscInt N; 3130 PetscCall(MatGetSize(mat, NULL, &N)); 3131 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3132 PetscCall(ISSetIdentity(iscol_local)); 3133 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3134 } else { 3135 PetscInt cbs; 3136 PetscCall(ISGetBlockSize(iscol, &cbs)); 3137 PetscCall(ISAllGather(iscol, &iscol_local)); 3138 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3139 } 3140 3141 *isseq = iscol_local; 3142 PetscFunctionReturn(PETSC_SUCCESS); 3143 } 3144 3145 /* 3146 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3147 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3148 3149 Input Parameters: 3150 + mat - matrix 3151 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->rstart <= isrow[i] < mat->rend 3153 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3154 i.e., mat->cstart <= iscol[i] < mat->cend 3155 3156 Output Parameters: 3157 + isrow_d - sequential row index set for retrieving mat->A 3158 . iscol_d - sequential column index set for retrieving mat->A 3159 . iscol_o - sequential column index set for retrieving mat->B 3160 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3161 */ 3162 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3163 { 3164 Vec x, cmap; 3165 const PetscInt *is_idx; 3166 PetscScalar *xarray, *cmaparray; 3167 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3169 Mat B = a->B; 3170 Vec lvec = a->lvec, lcmap; 3171 PetscInt i, cstart, cend, Bn = B->cmap->N; 3172 MPI_Comm comm; 3173 VecScatter Mvctx = a->Mvctx; 3174 3175 PetscFunctionBegin; 3176 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3177 PetscCall(ISGetLocalSize(iscol, &ncols)); 3178 3179 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3180 PetscCall(MatCreateVecs(mat, &x, NULL)); 3181 PetscCall(VecSet(x, -1.0)); 3182 PetscCall(VecDuplicate(x, &cmap)); 3183 PetscCall(VecSet(cmap, -1.0)); 3184 3185 /* Get start indices */ 3186 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3187 isstart -= ncols; 3188 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3189 3190 PetscCall(ISGetIndices(iscol, &is_idx)); 3191 PetscCall(VecGetArray(x, &xarray)); 3192 PetscCall(VecGetArray(cmap, &cmaparray)); 3193 PetscCall(PetscMalloc1(ncols, &idx)); 3194 for (i = 0; i < ncols; i++) { 3195 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3196 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3197 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3198 } 3199 PetscCall(VecRestoreArray(x, &xarray)); 3200 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3201 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3202 3203 /* Get iscol_d */ 3204 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3205 PetscCall(ISGetBlockSize(iscol, &i)); 3206 PetscCall(ISSetBlockSize(*iscol_d, i)); 3207 3208 /* Get isrow_d */ 3209 PetscCall(ISGetLocalSize(isrow, &m)); 3210 rstart = mat->rmap->rstart; 3211 PetscCall(PetscMalloc1(m, &idx)); 3212 PetscCall(ISGetIndices(isrow, &is_idx)); 3213 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3214 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3215 3216 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3217 PetscCall(ISGetBlockSize(isrow, &i)); 3218 PetscCall(ISSetBlockSize(*isrow_d, i)); 3219 3220 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3221 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3222 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3223 3224 PetscCall(VecDuplicate(lvec, &lcmap)); 3225 3226 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3227 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3228 3229 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3230 /* off-process column indices */ 3231 count = 0; 3232 PetscCall(PetscMalloc1(Bn, &idx)); 3233 PetscCall(PetscMalloc1(Bn, &cmap1)); 3234 3235 PetscCall(VecGetArray(lvec, &xarray)); 3236 PetscCall(VecGetArray(lcmap, &cmaparray)); 3237 for (i = 0; i < Bn; i++) { 3238 if (PetscRealPart(xarray[i]) > -1.0) { 3239 idx[count] = i; /* local column index in off-diagonal part B */ 3240 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3241 count++; 3242 } 3243 } 3244 PetscCall(VecRestoreArray(lvec, &xarray)); 3245 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3246 3247 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3248 /* cannot ensure iscol_o has same blocksize as iscol! */ 3249 3250 PetscCall(PetscFree(idx)); 3251 *garray = cmap1; 3252 3253 PetscCall(VecDestroy(&x)); 3254 PetscCall(VecDestroy(&cmap)); 3255 PetscCall(VecDestroy(&lcmap)); 3256 PetscFunctionReturn(PETSC_SUCCESS); 3257 } 3258 3259 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3260 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3261 { 3262 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3263 Mat M = NULL; 3264 MPI_Comm comm; 3265 IS iscol_d, isrow_d, iscol_o; 3266 Mat Asub = NULL, Bsub = NULL; 3267 PetscInt n; 3268 3269 PetscFunctionBegin; 3270 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3271 3272 if (call == MAT_REUSE_MATRIX) { 3273 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3274 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3275 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3276 3277 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3278 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3279 3280 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3281 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3282 3283 /* Update diagonal and off-diagonal portions of submat */ 3284 asub = (Mat_MPIAIJ *)(*submat)->data; 3285 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3286 PetscCall(ISGetLocalSize(iscol_o, &n)); 3287 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3288 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3289 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3290 3291 } else { /* call == MAT_INITIAL_MATRIX) */ 3292 const PetscInt *garray; 3293 PetscInt BsubN; 3294 3295 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3296 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3297 3298 /* Create local submatrices Asub and Bsub */ 3299 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3300 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3301 3302 /* Create submatrix M */ 3303 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3304 3305 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3306 asub = (Mat_MPIAIJ *)M->data; 3307 3308 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3309 n = asub->B->cmap->N; 3310 if (BsubN > n) { 3311 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3312 const PetscInt *idx; 3313 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3314 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3315 3316 PetscCall(PetscMalloc1(n, &idx_new)); 3317 j = 0; 3318 PetscCall(ISGetIndices(iscol_o, &idx)); 3319 for (i = 0; i < n; i++) { 3320 if (j >= BsubN) break; 3321 while (subgarray[i] > garray[j]) j++; 3322 3323 if (subgarray[i] == garray[j]) { 3324 idx_new[i] = idx[j++]; 3325 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3326 } 3327 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3328 3329 PetscCall(ISDestroy(&iscol_o)); 3330 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3331 3332 } else if (BsubN < n) { 3333 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3334 } 3335 3336 PetscCall(PetscFree(garray)); 3337 *submat = M; 3338 3339 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3340 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3341 PetscCall(ISDestroy(&isrow_d)); 3342 3343 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3344 PetscCall(ISDestroy(&iscol_d)); 3345 3346 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3347 PetscCall(ISDestroy(&iscol_o)); 3348 } 3349 PetscFunctionReturn(PETSC_SUCCESS); 3350 } 3351 3352 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3353 { 3354 IS iscol_local = NULL, isrow_d; 3355 PetscInt csize; 3356 PetscInt n, i, j, start, end; 3357 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3358 MPI_Comm comm; 3359 3360 PetscFunctionBegin; 3361 /* If isrow has same processor distribution as mat, 3362 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3363 if (call == MAT_REUSE_MATRIX) { 3364 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3365 if (isrow_d) { 3366 sameRowDist = PETSC_TRUE; 3367 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3368 } else { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3370 if (iscol_local) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3373 } 3374 } 3375 } else { 3376 /* Check if isrow has same processor distribution as mat */ 3377 sameDist[0] = PETSC_FALSE; 3378 PetscCall(ISGetLocalSize(isrow, &n)); 3379 if (!n) { 3380 sameDist[0] = PETSC_TRUE; 3381 } else { 3382 PetscCall(ISGetMinMax(isrow, &i, &j)); 3383 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3384 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3385 } 3386 3387 /* Check if iscol has same processor distribution as mat */ 3388 sameDist[1] = PETSC_FALSE; 3389 PetscCall(ISGetLocalSize(iscol, &n)); 3390 if (!n) { 3391 sameDist[1] = PETSC_TRUE; 3392 } else { 3393 PetscCall(ISGetMinMax(iscol, &i, &j)); 3394 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3395 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3396 } 3397 3398 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3399 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3400 sameRowDist = tsameDist[0]; 3401 } 3402 3403 if (sameRowDist) { 3404 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3405 /* isrow and iscol have same processor distribution as mat */ 3406 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3407 PetscFunctionReturn(PETSC_SUCCESS); 3408 } else { /* sameRowDist */ 3409 /* isrow has same processor distribution as mat */ 3410 if (call == MAT_INITIAL_MATRIX) { 3411 PetscBool sorted; 3412 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3413 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3414 PetscCall(ISGetSize(iscol, &i)); 3415 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3416 3417 PetscCall(ISSorted(iscol_local, &sorted)); 3418 if (sorted) { 3419 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3420 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3421 PetscFunctionReturn(PETSC_SUCCESS); 3422 } 3423 } else { /* call == MAT_REUSE_MATRIX */ 3424 IS iscol_sub; 3425 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3426 if (iscol_sub) { 3427 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3428 PetscFunctionReturn(PETSC_SUCCESS); 3429 } 3430 } 3431 } 3432 } 3433 3434 /* General case: iscol -> iscol_local which has global size of iscol */ 3435 if (call == MAT_REUSE_MATRIX) { 3436 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3437 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3438 } else { 3439 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3440 } 3441 3442 PetscCall(ISGetLocalSize(iscol, &csize)); 3443 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3444 3445 if (call == MAT_INITIAL_MATRIX) { 3446 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3447 PetscCall(ISDestroy(&iscol_local)); 3448 } 3449 PetscFunctionReturn(PETSC_SUCCESS); 3450 } 3451 3452 /*@C 3453 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3454 and "off-diagonal" part of the matrix in CSR format. 3455 3456 Collective 3457 3458 Input Parameters: 3459 + comm - MPI communicator 3460 . A - "diagonal" portion of matrix 3461 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3462 - garray - global index of `B` columns 3463 3464 Output Parameter: 3465 . mat - the matrix, with input `A` as its local diagonal matrix 3466 3467 Level: advanced 3468 3469 Notes: 3470 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3471 3472 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3473 3474 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3475 @*/ 3476 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3477 { 3478 Mat_MPIAIJ *maij; 3479 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3480 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3481 const PetscScalar *oa; 3482 Mat Bnew; 3483 PetscInt m, n, N; 3484 MatType mpi_mat_type; 3485 3486 PetscFunctionBegin; 3487 PetscCall(MatCreate(comm, mat)); 3488 PetscCall(MatGetSize(A, &m, &n)); 3489 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3490 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3491 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3492 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3493 3494 /* Get global columns of mat */ 3495 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3496 3497 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3498 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3499 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3500 PetscCall(MatSetType(*mat, mpi_mat_type)); 3501 3502 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3503 maij = (Mat_MPIAIJ *)(*mat)->data; 3504 3505 (*mat)->preallocated = PETSC_TRUE; 3506 3507 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3508 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3509 3510 /* Set A as diagonal portion of *mat */ 3511 maij->A = A; 3512 3513 nz = oi[m]; 3514 for (i = 0; i < nz; i++) { 3515 col = oj[i]; 3516 oj[i] = garray[col]; 3517 } 3518 3519 /* Set Bnew as off-diagonal portion of *mat */ 3520 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3521 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3522 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3523 bnew = (Mat_SeqAIJ *)Bnew->data; 3524 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3525 maij->B = Bnew; 3526 3527 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3528 3529 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3530 b->free_a = PETSC_FALSE; 3531 b->free_ij = PETSC_FALSE; 3532 PetscCall(MatDestroy(&B)); 3533 3534 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3535 bnew->free_a = PETSC_TRUE; 3536 bnew->free_ij = PETSC_TRUE; 3537 3538 /* condense columns of maij->B */ 3539 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3540 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3541 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3542 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3543 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3544 PetscFunctionReturn(PETSC_SUCCESS); 3545 } 3546 3547 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3548 3549 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3550 { 3551 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3552 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3553 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3554 Mat M, Msub, B = a->B; 3555 MatScalar *aa; 3556 Mat_SeqAIJ *aij; 3557 PetscInt *garray = a->garray, *colsub, Ncols; 3558 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3559 IS iscol_sub, iscmap; 3560 const PetscInt *is_idx, *cmap; 3561 PetscBool allcolumns = PETSC_FALSE; 3562 MPI_Comm comm; 3563 3564 PetscFunctionBegin; 3565 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3566 if (call == MAT_REUSE_MATRIX) { 3567 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3568 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3569 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3570 3571 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3572 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3573 3574 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3575 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3576 3577 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3578 3579 } else { /* call == MAT_INITIAL_MATRIX) */ 3580 PetscBool flg; 3581 3582 PetscCall(ISGetLocalSize(iscol, &n)); 3583 PetscCall(ISGetSize(iscol, &Ncols)); 3584 3585 /* (1) iscol -> nonscalable iscol_local */ 3586 /* Check for special case: each processor gets entire matrix columns */ 3587 PetscCall(ISIdentity(iscol_local, &flg)); 3588 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3589 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3590 if (allcolumns) { 3591 iscol_sub = iscol_local; 3592 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3593 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3594 3595 } else { 3596 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3597 PetscInt *idx, *cmap1, k; 3598 PetscCall(PetscMalloc1(Ncols, &idx)); 3599 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3600 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3601 count = 0; 3602 k = 0; 3603 for (i = 0; i < Ncols; i++) { 3604 j = is_idx[i]; 3605 if (j >= cstart && j < cend) { 3606 /* diagonal part of mat */ 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } else if (Bn) { 3610 /* off-diagonal part of mat */ 3611 if (j == garray[k]) { 3612 idx[count] = j; 3613 cmap1[count++] = i; /* column index in submat */ 3614 } else if (j > garray[k]) { 3615 while (j > garray[k] && k < Bn - 1) k++; 3616 if (j == garray[k]) { 3617 idx[count] = j; 3618 cmap1[count++] = i; /* column index in submat */ 3619 } 3620 } 3621 } 3622 } 3623 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3624 3625 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3626 PetscCall(ISGetBlockSize(iscol, &cbs)); 3627 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3628 3629 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3630 } 3631 3632 /* (3) Create sequential Msub */ 3633 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3634 } 3635 3636 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3637 aij = (Mat_SeqAIJ *)(Msub)->data; 3638 ii = aij->i; 3639 PetscCall(ISGetIndices(iscmap, &cmap)); 3640 3641 /* 3642 m - number of local rows 3643 Ncols - number of columns (same on all processors) 3644 rstart - first row in new global matrix generated 3645 */ 3646 PetscCall(MatGetSize(Msub, &m, NULL)); 3647 3648 if (call == MAT_INITIAL_MATRIX) { 3649 /* (4) Create parallel newmat */ 3650 PetscMPIInt rank, size; 3651 PetscInt csize; 3652 3653 PetscCallMPI(MPI_Comm_size(comm, &size)); 3654 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3655 3656 /* 3657 Determine the number of non-zeros in the diagonal and off-diagonal 3658 portions of the matrix in order to do correct preallocation 3659 */ 3660 3661 /* first get start and end of "diagonal" columns */ 3662 PetscCall(ISGetLocalSize(iscol, &csize)); 3663 if (csize == PETSC_DECIDE) { 3664 PetscCall(ISGetSize(isrow, &mglobal)); 3665 if (mglobal == Ncols) { /* square matrix */ 3666 nlocal = m; 3667 } else { 3668 nlocal = Ncols / size + ((Ncols % size) > rank); 3669 } 3670 } else { 3671 nlocal = csize; 3672 } 3673 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3674 rstart = rend - nlocal; 3675 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3676 3677 /* next, compute all the lengths */ 3678 jj = aij->j; 3679 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3680 olens = dlens + m; 3681 for (i = 0; i < m; i++) { 3682 jend = ii[i + 1] - ii[i]; 3683 olen = 0; 3684 dlen = 0; 3685 for (j = 0; j < jend; j++) { 3686 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3687 else dlen++; 3688 jj++; 3689 } 3690 olens[i] = olen; 3691 dlens[i] = dlen; 3692 } 3693 3694 PetscCall(ISGetBlockSize(isrow, &bs)); 3695 PetscCall(ISGetBlockSize(iscol, &cbs)); 3696 3697 PetscCall(MatCreate(comm, &M)); 3698 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3699 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3700 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3701 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3702 PetscCall(PetscFree(dlens)); 3703 3704 } else { /* call == MAT_REUSE_MATRIX */ 3705 M = *newmat; 3706 PetscCall(MatGetLocalSize(M, &i, NULL)); 3707 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3708 PetscCall(MatZeroEntries(M)); 3709 /* 3710 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3711 rather than the slower MatSetValues(). 3712 */ 3713 M->was_assembled = PETSC_TRUE; 3714 M->assembled = PETSC_FALSE; 3715 } 3716 3717 /* (5) Set values of Msub to *newmat */ 3718 PetscCall(PetscMalloc1(count, &colsub)); 3719 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3720 3721 jj = aij->j; 3722 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3723 for (i = 0; i < m; i++) { 3724 row = rstart + i; 3725 nz = ii[i + 1] - ii[i]; 3726 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3727 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3728 jj += nz; 3729 aa += nz; 3730 } 3731 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3732 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3733 3734 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3735 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3736 3737 PetscCall(PetscFree(colsub)); 3738 3739 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3740 if (call == MAT_INITIAL_MATRIX) { 3741 *newmat = M; 3742 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3743 PetscCall(MatDestroy(&Msub)); 3744 3745 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3746 PetscCall(ISDestroy(&iscol_sub)); 3747 3748 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3749 PetscCall(ISDestroy(&iscmap)); 3750 3751 if (iscol_local) { 3752 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3753 PetscCall(ISDestroy(&iscol_local)); 3754 } 3755 } 3756 PetscFunctionReturn(PETSC_SUCCESS); 3757 } 3758 3759 /* 3760 Not great since it makes two copies of the submatrix, first an SeqAIJ 3761 in local and then by concatenating the local matrices the end result. 3762 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3763 3764 This requires a sequential iscol with all indices. 3765 */ 3766 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3767 { 3768 PetscMPIInt rank, size; 3769 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3770 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3771 Mat M, Mreuse; 3772 MatScalar *aa, *vwork; 3773 MPI_Comm comm; 3774 Mat_SeqAIJ *aij; 3775 PetscBool colflag, allcolumns = PETSC_FALSE; 3776 3777 PetscFunctionBegin; 3778 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3779 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3780 PetscCallMPI(MPI_Comm_size(comm, &size)); 3781 3782 /* Check for special case: each processor gets entire matrix columns */ 3783 PetscCall(ISIdentity(iscol, &colflag)); 3784 PetscCall(ISGetLocalSize(iscol, &n)); 3785 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3786 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3787 3788 if (call == MAT_REUSE_MATRIX) { 3789 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3790 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3791 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3792 } else { 3793 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3794 } 3795 3796 /* 3797 m - number of local rows 3798 n - number of columns (same on all processors) 3799 rstart - first row in new global matrix generated 3800 */ 3801 PetscCall(MatGetSize(Mreuse, &m, &n)); 3802 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3803 if (call == MAT_INITIAL_MATRIX) { 3804 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3805 ii = aij->i; 3806 jj = aij->j; 3807 3808 /* 3809 Determine the number of non-zeros in the diagonal and off-diagonal 3810 portions of the matrix in order to do correct preallocation 3811 */ 3812 3813 /* first get start and end of "diagonal" columns */ 3814 if (csize == PETSC_DECIDE) { 3815 PetscCall(ISGetSize(isrow, &mglobal)); 3816 if (mglobal == n) { /* square matrix */ 3817 nlocal = m; 3818 } else { 3819 nlocal = n / size + ((n % size) > rank); 3820 } 3821 } else { 3822 nlocal = csize; 3823 } 3824 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3825 rstart = rend - nlocal; 3826 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3827 3828 /* next, compute all the lengths */ 3829 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3830 olens = dlens + m; 3831 for (i = 0; i < m; i++) { 3832 jend = ii[i + 1] - ii[i]; 3833 olen = 0; 3834 dlen = 0; 3835 for (j = 0; j < jend; j++) { 3836 if (*jj < rstart || *jj >= rend) olen++; 3837 else dlen++; 3838 jj++; 3839 } 3840 olens[i] = olen; 3841 dlens[i] = dlen; 3842 } 3843 PetscCall(MatCreate(comm, &M)); 3844 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3845 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3846 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3847 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3848 PetscCall(PetscFree(dlens)); 3849 } else { 3850 PetscInt ml, nl; 3851 3852 M = *newmat; 3853 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3854 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3855 PetscCall(MatZeroEntries(M)); 3856 /* 3857 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3858 rather than the slower MatSetValues(). 3859 */ 3860 M->was_assembled = PETSC_TRUE; 3861 M->assembled = PETSC_FALSE; 3862 } 3863 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3864 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3865 ii = aij->i; 3866 jj = aij->j; 3867 3868 /* trigger copy to CPU if needed */ 3869 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3870 for (i = 0; i < m; i++) { 3871 row = rstart + i; 3872 nz = ii[i + 1] - ii[i]; 3873 cwork = jj; 3874 jj = PetscSafePointerPlusOffset(jj, nz); 3875 vwork = aa; 3876 aa = PetscSafePointerPlusOffset(aa, nz); 3877 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3878 } 3879 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3880 3881 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3882 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3883 *newmat = M; 3884 3885 /* save submatrix used in processor for next request */ 3886 if (call == MAT_INITIAL_MATRIX) { 3887 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3888 PetscCall(MatDestroy(&Mreuse)); 3889 } 3890 PetscFunctionReturn(PETSC_SUCCESS); 3891 } 3892 3893 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3894 { 3895 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3896 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3897 const PetscInt *JJ; 3898 PetscBool nooffprocentries; 3899 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3900 3901 PetscFunctionBegin; 3902 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3903 3904 PetscCall(PetscLayoutSetUp(B->rmap)); 3905 PetscCall(PetscLayoutSetUp(B->cmap)); 3906 m = B->rmap->n; 3907 cstart = B->cmap->rstart; 3908 cend = B->cmap->rend; 3909 rstart = B->rmap->rstart; 3910 3911 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3912 3913 if (PetscDefined(USE_DEBUG)) { 3914 for (i = 0; i < m; i++) { 3915 nnz = Ii[i + 1] - Ii[i]; 3916 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3917 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3918 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3919 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3920 } 3921 } 3922 3923 for (i = 0; i < m; i++) { 3924 nnz = Ii[i + 1] - Ii[i]; 3925 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3926 nnz_max = PetscMax(nnz_max, nnz); 3927 d = 0; 3928 for (j = 0; j < nnz; j++) { 3929 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3930 } 3931 d_nnz[i] = d; 3932 o_nnz[i] = nnz - d; 3933 } 3934 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3935 PetscCall(PetscFree2(d_nnz, o_nnz)); 3936 3937 for (i = 0; i < m; i++) { 3938 ii = i + rstart; 3939 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3940 } 3941 nooffprocentries = B->nooffprocentries; 3942 B->nooffprocentries = PETSC_TRUE; 3943 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3944 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3945 B->nooffprocentries = nooffprocentries; 3946 3947 /* count number of entries below block diagonal */ 3948 PetscCall(PetscFree(Aij->ld)); 3949 PetscCall(PetscCalloc1(m, &ld)); 3950 Aij->ld = ld; 3951 for (i = 0; i < m; i++) { 3952 nnz = Ii[i + 1] - Ii[i]; 3953 j = 0; 3954 while (j < nnz && J[j] < cstart) j++; 3955 ld[i] = j; 3956 if (J) J += nnz; 3957 } 3958 3959 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3960 PetscFunctionReturn(PETSC_SUCCESS); 3961 } 3962 3963 /*@ 3964 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3965 (the default parallel PETSc format). 3966 3967 Collective 3968 3969 Input Parameters: 3970 + B - the matrix 3971 . i - the indices into `j` for the start of each local row (indices start with zero) 3972 . j - the column indices for each local row (indices start with zero) 3973 - v - optional values in the matrix 3974 3975 Level: developer 3976 3977 Notes: 3978 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3979 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3980 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3981 3982 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3983 3984 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3985 3986 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3987 3988 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3989 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3990 3991 The format which is used for the sparse matrix input, is equivalent to a 3992 row-major ordering.. i.e for the following matrix, the input data expected is 3993 as shown 3994 .vb 3995 1 0 0 3996 2 0 3 P0 3997 ------- 3998 4 5 6 P1 3999 4000 Process0 [P0] rows_owned=[0,1] 4001 i = {0,1,3} [size = nrow+1 = 2+1] 4002 j = {0,0,2} [size = 3] 4003 v = {1,2,3} [size = 3] 4004 4005 Process1 [P1] rows_owned=[2] 4006 i = {0,3} [size = nrow+1 = 1+1] 4007 j = {0,1,2} [size = 3] 4008 v = {4,5,6} [size = 3] 4009 .ve 4010 4011 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4012 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4013 @*/ 4014 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4015 { 4016 PetscFunctionBegin; 4017 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4018 PetscFunctionReturn(PETSC_SUCCESS); 4019 } 4020 4021 /*@C 4022 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4023 (the default parallel PETSc format). For good matrix assembly performance 4024 the user should preallocate the matrix storage by setting the parameters 4025 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4026 4027 Collective 4028 4029 Input Parameters: 4030 + B - the matrix 4031 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4032 (same value is used for all local rows) 4033 . d_nnz - array containing the number of nonzeros in the various rows of the 4034 DIAGONAL portion of the local submatrix (possibly different for each row) 4035 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4036 The size of this array is equal to the number of local rows, i.e 'm'. 4037 For matrices that will be factored, you must leave room for (and set) 4038 the diagonal entry even if it is zero. 4039 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4040 submatrix (same value is used for all local rows). 4041 - o_nnz - array containing the number of nonzeros in the various rows of the 4042 OFF-DIAGONAL portion of the local submatrix (possibly different for 4043 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4044 structure. The size of this array is equal to the number 4045 of local rows, i.e 'm'. 4046 4047 Example Usage: 4048 Consider the following 8x8 matrix with 34 non-zero values, that is 4049 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4050 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4051 as follows 4052 4053 .vb 4054 1 2 0 | 0 3 0 | 0 4 4055 Proc0 0 5 6 | 7 0 0 | 8 0 4056 9 0 10 | 11 0 0 | 12 0 4057 ------------------------------------- 4058 13 0 14 | 15 16 17 | 0 0 4059 Proc1 0 18 0 | 19 20 21 | 0 0 4060 0 0 0 | 22 23 0 | 24 0 4061 ------------------------------------- 4062 Proc2 25 26 27 | 0 0 28 | 29 0 4063 30 0 0 | 31 32 33 | 0 34 4064 .ve 4065 4066 This can be represented as a collection of submatrices as 4067 .vb 4068 A B C 4069 D E F 4070 G H I 4071 .ve 4072 4073 Where the submatrices A,B,C are owned by proc0, D,E,F are 4074 owned by proc1, G,H,I are owned by proc2. 4075 4076 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4077 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4078 The 'M','N' parameters are 8,8, and have the same values on all procs. 4079 4080 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4081 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4082 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4083 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4084 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4085 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4086 4087 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4088 allocated for every row of the local diagonal submatrix, and `o_nz` 4089 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4090 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4091 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4092 In this case, the values of `d_nz`, `o_nz` are 4093 .vb 4094 proc0 dnz = 2, o_nz = 2 4095 proc1 dnz = 3, o_nz = 2 4096 proc2 dnz = 1, o_nz = 4 4097 .ve 4098 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4099 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4100 for proc3. i.e we are using 12+15+10=37 storage locations to store 4101 34 values. 4102 4103 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4104 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4105 In the above case the values for `d_nnz`, `o_nnz` are 4106 .vb 4107 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4108 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4109 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4110 .ve 4111 Here the space allocated is sum of all the above values i.e 34, and 4112 hence pre-allocation is perfect. 4113 4114 Level: intermediate 4115 4116 Notes: 4117 If the *_nnz parameter is given then the *_nz parameter is ignored 4118 4119 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4120 storage. The stored row and column indices begin with zero. 4121 See [Sparse Matrices](sec_matsparse) for details. 4122 4123 The parallel matrix is partitioned such that the first m0 rows belong to 4124 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4125 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4126 4127 The DIAGONAL portion of the local submatrix of a processor can be defined 4128 as the submatrix which is obtained by extraction the part corresponding to 4129 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4130 first row that belongs to the processor, r2 is the last row belonging to 4131 the this processor, and c1-c2 is range of indices of the local part of a 4132 vector suitable for applying the matrix to. This is an mxn matrix. In the 4133 common case of a square matrix, the row and column ranges are the same and 4134 the DIAGONAL part is also square. The remaining portion of the local 4135 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4136 4137 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4138 4139 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4140 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4141 You can also run with the option `-info` and look for messages with the string 4142 malloc in them to see if additional memory allocation was needed. 4143 4144 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4145 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4146 @*/ 4147 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4148 { 4149 PetscFunctionBegin; 4150 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4151 PetscValidType(B, 1); 4152 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4153 PetscFunctionReturn(PETSC_SUCCESS); 4154 } 4155 4156 /*@ 4157 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4158 CSR format for the local rows. 4159 4160 Collective 4161 4162 Input Parameters: 4163 + comm - MPI communicator 4164 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4165 . n - This value should be the same as the local size used in creating the 4166 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4167 calculated if `N` is given) For square matrices n is almost always `m`. 4168 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4169 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4170 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4171 . j - global column indices 4172 - a - optional matrix values 4173 4174 Output Parameter: 4175 . mat - the matrix 4176 4177 Level: intermediate 4178 4179 Notes: 4180 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4181 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4182 called this routine. Use `MatCreateMPIAIJWithSplitArray()` to avoid needing to copy the arrays. 4183 4184 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4185 4186 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4187 4188 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4189 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4190 4191 The format which is used for the sparse matrix input, is equivalent to a 4192 row-major ordering.. i.e for the following matrix, the input data expected is 4193 as shown 4194 .vb 4195 1 0 0 4196 2 0 3 P0 4197 ------- 4198 4 5 6 P1 4199 4200 Process0 [P0] rows_owned=[0,1] 4201 i = {0,1,3} [size = nrow+1 = 2+1] 4202 j = {0,0,2} [size = 3] 4203 v = {1,2,3} [size = 3] 4204 4205 Process1 [P1] rows_owned=[2] 4206 i = {0,3} [size = nrow+1 = 1+1] 4207 j = {0,1,2} [size = 3] 4208 v = {4,5,6} [size = 3] 4209 .ve 4210 4211 .seealso: [](ch_matrices), `Mat`, `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4212 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4213 @*/ 4214 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4215 { 4216 PetscFunctionBegin; 4217 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4218 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4219 PetscCall(MatCreate(comm, mat)); 4220 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4221 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4222 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4223 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4224 PetscFunctionReturn(PETSC_SUCCESS); 4225 } 4226 4227 /*@ 4228 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4229 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4230 from `MatCreateMPIAIJWithArrays()` 4231 4232 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4233 4234 Collective 4235 4236 Input Parameters: 4237 + mat - the matrix 4238 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4239 . n - This value should be the same as the local size used in creating the 4240 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4241 calculated if N is given) For square matrices n is almost always m. 4242 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4243 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4244 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4245 . J - column indices 4246 - v - matrix values 4247 4248 Level: deprecated 4249 4250 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4251 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4252 @*/ 4253 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4254 { 4255 PetscInt nnz, i; 4256 PetscBool nooffprocentries; 4257 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4258 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4259 PetscScalar *ad, *ao; 4260 PetscInt ldi, Iii, md; 4261 const PetscInt *Adi = Ad->i; 4262 PetscInt *ld = Aij->ld; 4263 4264 PetscFunctionBegin; 4265 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4266 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4267 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4268 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4269 4270 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4271 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4272 4273 for (i = 0; i < m; i++) { 4274 if (PetscDefined(USE_DEBUG)) { 4275 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4276 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4277 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4278 } 4279 } 4280 nnz = Ii[i + 1] - Ii[i]; 4281 Iii = Ii[i]; 4282 ldi = ld[i]; 4283 md = Adi[i + 1] - Adi[i]; 4284 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4285 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4286 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4287 ad += md; 4288 ao += nnz - md; 4289 } 4290 nooffprocentries = mat->nooffprocentries; 4291 mat->nooffprocentries = PETSC_TRUE; 4292 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4293 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4294 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4295 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4296 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4297 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4298 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4299 mat->nooffprocentries = nooffprocentries; 4300 PetscFunctionReturn(PETSC_SUCCESS); 4301 } 4302 4303 /*@ 4304 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4305 4306 Collective 4307 4308 Input Parameters: 4309 + mat - the matrix 4310 - v - matrix values, stored by row 4311 4312 Level: intermediate 4313 4314 Notes: 4315 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4316 4317 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4318 4319 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4320 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4321 @*/ 4322 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4323 { 4324 PetscInt nnz, i, m; 4325 PetscBool nooffprocentries; 4326 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4327 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4328 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4329 PetscScalar *ad, *ao; 4330 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4331 PetscInt ldi, Iii, md; 4332 PetscInt *ld = Aij->ld; 4333 4334 PetscFunctionBegin; 4335 m = mat->rmap->n; 4336 4337 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4338 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4339 Iii = 0; 4340 for (i = 0; i < m; i++) { 4341 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4342 ldi = ld[i]; 4343 md = Adi[i + 1] - Adi[i]; 4344 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4345 ad += md; 4346 if (ao) { 4347 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4348 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4349 ao += nnz - md; 4350 } 4351 Iii += nnz; 4352 } 4353 nooffprocentries = mat->nooffprocentries; 4354 mat->nooffprocentries = PETSC_TRUE; 4355 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4356 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4357 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4358 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4359 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4360 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4361 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4362 mat->nooffprocentries = nooffprocentries; 4363 PetscFunctionReturn(PETSC_SUCCESS); 4364 } 4365 4366 /*@C 4367 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4368 (the default parallel PETSc format). For good matrix assembly performance 4369 the user should preallocate the matrix storage by setting the parameters 4370 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4371 4372 Collective 4373 4374 Input Parameters: 4375 + comm - MPI communicator 4376 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4377 This value should be the same as the local size used in creating the 4378 y vector for the matrix-vector product y = Ax. 4379 . n - This value should be the same as the local size used in creating the 4380 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4381 calculated if N is given) For square matrices n is almost always m. 4382 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4383 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4384 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4385 (same value is used for all local rows) 4386 . d_nnz - array containing the number of nonzeros in the various rows of the 4387 DIAGONAL portion of the local submatrix (possibly different for each row) 4388 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4389 The size of this array is equal to the number of local rows, i.e 'm'. 4390 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4391 submatrix (same value is used for all local rows). 4392 - o_nnz - array containing the number of nonzeros in the various rows of the 4393 OFF-DIAGONAL portion of the local submatrix (possibly different for 4394 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4395 structure. The size of this array is equal to the number 4396 of local rows, i.e 'm'. 4397 4398 Output Parameter: 4399 . A - the matrix 4400 4401 Options Database Keys: 4402 + -mat_no_inode - Do not use inodes 4403 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4404 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4405 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4406 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4407 4408 Level: intermediate 4409 4410 Notes: 4411 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4412 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4413 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4414 4415 If the *_nnz parameter is given then the *_nz parameter is ignored 4416 4417 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4418 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4419 storage requirements for this matrix. 4420 4421 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4422 processor than it must be used on all processors that share the object for 4423 that argument. 4424 4425 The user MUST specify either the local or global matrix dimensions 4426 (possibly both). 4427 4428 The parallel matrix is partitioned across processors such that the 4429 first m0 rows belong to process 0, the next m1 rows belong to 4430 process 1, the next m2 rows belong to process 2 etc.. where 4431 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4432 values corresponding to [m x N] submatrix. 4433 4434 The columns are logically partitioned with the n0 columns belonging 4435 to 0th partition, the next n1 columns belonging to the next 4436 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4437 4438 The DIAGONAL portion of the local submatrix on any given processor 4439 is the submatrix corresponding to the rows and columns m,n 4440 corresponding to the given processor. i.e diagonal matrix on 4441 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4442 etc. The remaining portion of the local submatrix [m x (N-n)] 4443 constitute the OFF-DIAGONAL portion. The example below better 4444 illustrates this concept. 4445 4446 For a square global matrix we define each processor's diagonal portion 4447 to be its local rows and the corresponding columns (a square submatrix); 4448 each processor's off-diagonal portion encompasses the remainder of the 4449 local matrix (a rectangular submatrix). 4450 4451 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4452 4453 When calling this routine with a single process communicator, a matrix of 4454 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4455 type of communicator, use the construction mechanism 4456 .vb 4457 MatCreate(..., &A); 4458 MatSetType(A, MATMPIAIJ); 4459 MatSetSizes(A, m, n, M, N); 4460 MatMPIAIJSetPreallocation(A, ...); 4461 .ve 4462 4463 By default, this format uses inodes (identical nodes) when possible. 4464 We search for consecutive rows with the same nonzero structure, thereby 4465 reusing matrix information to achieve increased efficiency. 4466 4467 Example Usage: 4468 Consider the following 8x8 matrix with 34 non-zero values, that is 4469 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4470 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4471 as follows 4472 4473 .vb 4474 1 2 0 | 0 3 0 | 0 4 4475 Proc0 0 5 6 | 7 0 0 | 8 0 4476 9 0 10 | 11 0 0 | 12 0 4477 ------------------------------------- 4478 13 0 14 | 15 16 17 | 0 0 4479 Proc1 0 18 0 | 19 20 21 | 0 0 4480 0 0 0 | 22 23 0 | 24 0 4481 ------------------------------------- 4482 Proc2 25 26 27 | 0 0 28 | 29 0 4483 30 0 0 | 31 32 33 | 0 34 4484 .ve 4485 4486 This can be represented as a collection of submatrices as 4487 4488 .vb 4489 A B C 4490 D E F 4491 G H I 4492 .ve 4493 4494 Where the submatrices A,B,C are owned by proc0, D,E,F are 4495 owned by proc1, G,H,I are owned by proc2. 4496 4497 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4498 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4499 The 'M','N' parameters are 8,8, and have the same values on all procs. 4500 4501 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4502 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4503 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4504 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4505 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4506 matrix, ans [DF] as another SeqAIJ matrix. 4507 4508 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4509 allocated for every row of the local diagonal submatrix, and `o_nz` 4510 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4511 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4512 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4513 In this case, the values of `d_nz`,`o_nz` are 4514 .vb 4515 proc0 dnz = 2, o_nz = 2 4516 proc1 dnz = 3, o_nz = 2 4517 proc2 dnz = 1, o_nz = 4 4518 .ve 4519 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4520 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4521 for proc3. i.e we are using 12+15+10=37 storage locations to store 4522 34 values. 4523 4524 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4525 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4526 In the above case the values for d_nnz,o_nnz are 4527 .vb 4528 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4529 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4530 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4531 .ve 4532 Here the space allocated is sum of all the above values i.e 34, and 4533 hence pre-allocation is perfect. 4534 4535 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4536 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4537 @*/ 4538 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4539 { 4540 PetscMPIInt size; 4541 4542 PetscFunctionBegin; 4543 PetscCall(MatCreate(comm, A)); 4544 PetscCall(MatSetSizes(*A, m, n, M, N)); 4545 PetscCallMPI(MPI_Comm_size(comm, &size)); 4546 if (size > 1) { 4547 PetscCall(MatSetType(*A, MATMPIAIJ)); 4548 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4549 } else { 4550 PetscCall(MatSetType(*A, MATSEQAIJ)); 4551 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4552 } 4553 PetscFunctionReturn(PETSC_SUCCESS); 4554 } 4555 4556 /*MC 4557 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4558 4559 Synopsis: 4560 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4561 4562 Not Collective 4563 4564 Input Parameter: 4565 . A - the `MATMPIAIJ` matrix 4566 4567 Output Parameters: 4568 + Ad - the diagonal portion of the matrix 4569 . Ao - the off-diagonal portion of the matrix 4570 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4571 - ierr - error code 4572 4573 Level: advanced 4574 4575 Note: 4576 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4577 4578 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4579 M*/ 4580 4581 /*MC 4582 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4583 4584 Synopsis: 4585 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4586 4587 Not Collective 4588 4589 Input Parameters: 4590 + A - the `MATMPIAIJ` matrix 4591 . Ad - the diagonal portion of the matrix 4592 . Ao - the off-diagonal portion of the matrix 4593 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4594 - ierr - error code 4595 4596 Level: advanced 4597 4598 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4599 M*/ 4600 4601 /*@C 4602 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4603 4604 Not Collective 4605 4606 Input Parameter: 4607 . A - The `MATMPIAIJ` matrix 4608 4609 Output Parameters: 4610 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4611 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4612 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4613 4614 Level: intermediate 4615 4616 Note: 4617 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4618 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4619 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4620 local column numbers to global column numbers in the original matrix. 4621 4622 Fortran Notes: 4623 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4624 4625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4626 @*/ 4627 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4628 { 4629 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4630 PetscBool flg; 4631 4632 PetscFunctionBegin; 4633 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4634 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4635 if (Ad) *Ad = a->A; 4636 if (Ao) *Ao = a->B; 4637 if (colmap) *colmap = a->garray; 4638 PetscFunctionReturn(PETSC_SUCCESS); 4639 } 4640 4641 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4642 { 4643 PetscInt m, N, i, rstart, nnz, Ii; 4644 PetscInt *indx; 4645 PetscScalar *values; 4646 MatType rootType; 4647 4648 PetscFunctionBegin; 4649 PetscCall(MatGetSize(inmat, &m, &N)); 4650 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4651 PetscInt *dnz, *onz, sum, bs, cbs; 4652 4653 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4654 /* Check sum(n) = N */ 4655 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4656 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4657 4658 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4659 rstart -= m; 4660 4661 MatPreallocateBegin(comm, m, n, dnz, onz); 4662 for (i = 0; i < m; i++) { 4663 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4664 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4665 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4666 } 4667 4668 PetscCall(MatCreate(comm, outmat)); 4669 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4670 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4671 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4672 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4673 PetscCall(MatSetType(*outmat, rootType)); 4674 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4675 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4676 MatPreallocateEnd(dnz, onz); 4677 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4678 } 4679 4680 /* numeric phase */ 4681 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4682 for (i = 0; i < m; i++) { 4683 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4684 Ii = i + rstart; 4685 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4686 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4687 } 4688 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4689 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4690 PetscFunctionReturn(PETSC_SUCCESS); 4691 } 4692 4693 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4694 { 4695 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4696 4697 PetscFunctionBegin; 4698 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4699 PetscCall(PetscFree(merge->id_r)); 4700 PetscCall(PetscFree(merge->len_s)); 4701 PetscCall(PetscFree(merge->len_r)); 4702 PetscCall(PetscFree(merge->bi)); 4703 PetscCall(PetscFree(merge->bj)); 4704 PetscCall(PetscFree(merge->buf_ri[0])); 4705 PetscCall(PetscFree(merge->buf_ri)); 4706 PetscCall(PetscFree(merge->buf_rj[0])); 4707 PetscCall(PetscFree(merge->buf_rj)); 4708 PetscCall(PetscFree(merge->coi)); 4709 PetscCall(PetscFree(merge->coj)); 4710 PetscCall(PetscFree(merge->owners_co)); 4711 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4712 PetscCall(PetscFree(merge)); 4713 PetscFunctionReturn(PETSC_SUCCESS); 4714 } 4715 4716 #include <../src/mat/utils/freespace.h> 4717 #include <petscbt.h> 4718 4719 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4720 { 4721 MPI_Comm comm; 4722 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4723 PetscMPIInt size, rank, taga, *len_s; 4724 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4725 PetscInt proc, m; 4726 PetscInt **buf_ri, **buf_rj; 4727 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4728 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4729 MPI_Request *s_waits, *r_waits; 4730 MPI_Status *status; 4731 const MatScalar *aa, *a_a; 4732 MatScalar **abuf_r, *ba_i; 4733 Mat_Merge_SeqsToMPI *merge; 4734 PetscContainer container; 4735 4736 PetscFunctionBegin; 4737 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4738 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4739 4740 PetscCallMPI(MPI_Comm_size(comm, &size)); 4741 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4742 4743 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4744 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4745 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4746 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4747 aa = a_a; 4748 4749 bi = merge->bi; 4750 bj = merge->bj; 4751 buf_ri = merge->buf_ri; 4752 buf_rj = merge->buf_rj; 4753 4754 PetscCall(PetscMalloc1(size, &status)); 4755 owners = merge->rowmap->range; 4756 len_s = merge->len_s; 4757 4758 /* send and recv matrix values */ 4759 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4760 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4761 4762 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4763 for (proc = 0, k = 0; proc < size; proc++) { 4764 if (!len_s[proc]) continue; 4765 i = owners[proc]; 4766 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4767 k++; 4768 } 4769 4770 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4771 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4772 PetscCall(PetscFree(status)); 4773 4774 PetscCall(PetscFree(s_waits)); 4775 PetscCall(PetscFree(r_waits)); 4776 4777 /* insert mat values of mpimat */ 4778 PetscCall(PetscMalloc1(N, &ba_i)); 4779 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4780 4781 for (k = 0; k < merge->nrecv; k++) { 4782 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4783 nrows = *buf_ri_k[k]; 4784 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4785 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4786 } 4787 4788 /* set values of ba */ 4789 m = merge->rowmap->n; 4790 for (i = 0; i < m; i++) { 4791 arow = owners[rank] + i; 4792 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4793 bnzi = bi[i + 1] - bi[i]; 4794 PetscCall(PetscArrayzero(ba_i, bnzi)); 4795 4796 /* add local non-zero vals of this proc's seqmat into ba */ 4797 anzi = ai[arow + 1] - ai[arow]; 4798 aj = a->j + ai[arow]; 4799 aa = a_a + ai[arow]; 4800 nextaj = 0; 4801 for (j = 0; nextaj < anzi; j++) { 4802 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4803 ba_i[j] += aa[nextaj++]; 4804 } 4805 } 4806 4807 /* add received vals into ba */ 4808 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4809 /* i-th row */ 4810 if (i == *nextrow[k]) { 4811 anzi = *(nextai[k] + 1) - *nextai[k]; 4812 aj = buf_rj[k] + *nextai[k]; 4813 aa = abuf_r[k] + *nextai[k]; 4814 nextaj = 0; 4815 for (j = 0; nextaj < anzi; j++) { 4816 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4817 ba_i[j] += aa[nextaj++]; 4818 } 4819 } 4820 nextrow[k]++; 4821 nextai[k]++; 4822 } 4823 } 4824 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4825 } 4826 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4827 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4828 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4829 4830 PetscCall(PetscFree(abuf_r[0])); 4831 PetscCall(PetscFree(abuf_r)); 4832 PetscCall(PetscFree(ba_i)); 4833 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4834 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4835 PetscFunctionReturn(PETSC_SUCCESS); 4836 } 4837 4838 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4839 { 4840 Mat B_mpi; 4841 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4842 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4843 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4844 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4845 PetscInt len, proc, *dnz, *onz, bs, cbs; 4846 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4847 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4848 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4849 MPI_Status *status; 4850 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4851 PetscBT lnkbt; 4852 Mat_Merge_SeqsToMPI *merge; 4853 PetscContainer container; 4854 4855 PetscFunctionBegin; 4856 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4857 4858 /* make sure it is a PETSc comm */ 4859 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4860 PetscCallMPI(MPI_Comm_size(comm, &size)); 4861 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4862 4863 PetscCall(PetscNew(&merge)); 4864 PetscCall(PetscMalloc1(size, &status)); 4865 4866 /* determine row ownership */ 4867 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4868 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4869 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4870 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4871 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4872 PetscCall(PetscMalloc1(size, &len_si)); 4873 PetscCall(PetscMalloc1(size, &merge->len_s)); 4874 4875 m = merge->rowmap->n; 4876 owners = merge->rowmap->range; 4877 4878 /* determine the number of messages to send, their lengths */ 4879 len_s = merge->len_s; 4880 4881 len = 0; /* length of buf_si[] */ 4882 merge->nsend = 0; 4883 for (proc = 0; proc < size; proc++) { 4884 len_si[proc] = 0; 4885 if (proc == rank) { 4886 len_s[proc] = 0; 4887 } else { 4888 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4889 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4890 } 4891 if (len_s[proc]) { 4892 merge->nsend++; 4893 nrows = 0; 4894 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4895 if (ai[i + 1] > ai[i]) nrows++; 4896 } 4897 len_si[proc] = 2 * (nrows + 1); 4898 len += len_si[proc]; 4899 } 4900 } 4901 4902 /* determine the number and length of messages to receive for ij-structure */ 4903 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4904 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4905 4906 /* post the Irecv of j-structure */ 4907 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4908 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4909 4910 /* post the Isend of j-structure */ 4911 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4912 4913 for (proc = 0, k = 0; proc < size; proc++) { 4914 if (!len_s[proc]) continue; 4915 i = owners[proc]; 4916 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4917 k++; 4918 } 4919 4920 /* receives and sends of j-structure are complete */ 4921 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4922 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4923 4924 /* send and recv i-structure */ 4925 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4926 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4927 4928 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4929 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4930 for (proc = 0, k = 0; proc < size; proc++) { 4931 if (!len_s[proc]) continue; 4932 /* form outgoing message for i-structure: 4933 buf_si[0]: nrows to be sent 4934 [1:nrows]: row index (global) 4935 [nrows+1:2*nrows+1]: i-structure index 4936 */ 4937 nrows = len_si[proc] / 2 - 1; 4938 buf_si_i = buf_si + nrows + 1; 4939 buf_si[0] = nrows; 4940 buf_si_i[0] = 0; 4941 nrows = 0; 4942 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4943 anzi = ai[i + 1] - ai[i]; 4944 if (anzi) { 4945 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4946 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4947 nrows++; 4948 } 4949 } 4950 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4951 k++; 4952 buf_si += len_si[proc]; 4953 } 4954 4955 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4956 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4957 4958 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4959 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4960 4961 PetscCall(PetscFree(len_si)); 4962 PetscCall(PetscFree(len_ri)); 4963 PetscCall(PetscFree(rj_waits)); 4964 PetscCall(PetscFree2(si_waits, sj_waits)); 4965 PetscCall(PetscFree(ri_waits)); 4966 PetscCall(PetscFree(buf_s)); 4967 PetscCall(PetscFree(status)); 4968 4969 /* compute a local seq matrix in each processor */ 4970 /* allocate bi array and free space for accumulating nonzero column info */ 4971 PetscCall(PetscMalloc1(m + 1, &bi)); 4972 bi[0] = 0; 4973 4974 /* create and initialize a linked list */ 4975 nlnk = N + 1; 4976 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4977 4978 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4979 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4980 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4981 4982 current_space = free_space; 4983 4984 /* determine symbolic info for each local row */ 4985 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4986 4987 for (k = 0; k < merge->nrecv; k++) { 4988 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4989 nrows = *buf_ri_k[k]; 4990 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4991 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4992 } 4993 4994 MatPreallocateBegin(comm, m, n, dnz, onz); 4995 len = 0; 4996 for (i = 0; i < m; i++) { 4997 bnzi = 0; 4998 /* add local non-zero cols of this proc's seqmat into lnk */ 4999 arow = owners[rank] + i; 5000 anzi = ai[arow + 1] - ai[arow]; 5001 aj = a->j + ai[arow]; 5002 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5003 bnzi += nlnk; 5004 /* add received col data into lnk */ 5005 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5006 if (i == *nextrow[k]) { /* i-th row */ 5007 anzi = *(nextai[k] + 1) - *nextai[k]; 5008 aj = buf_rj[k] + *nextai[k]; 5009 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5010 bnzi += nlnk; 5011 nextrow[k]++; 5012 nextai[k]++; 5013 } 5014 } 5015 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5016 5017 /* if free space is not available, make more free space */ 5018 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5019 /* copy data into free space, then initialize lnk */ 5020 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5021 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5022 5023 current_space->array += bnzi; 5024 current_space->local_used += bnzi; 5025 current_space->local_remaining -= bnzi; 5026 5027 bi[i + 1] = bi[i] + bnzi; 5028 } 5029 5030 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5031 5032 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5033 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5034 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5035 5036 /* create symbolic parallel matrix B_mpi */ 5037 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5038 PetscCall(MatCreate(comm, &B_mpi)); 5039 if (n == PETSC_DECIDE) { 5040 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5041 } else { 5042 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5043 } 5044 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5045 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5046 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5047 MatPreallocateEnd(dnz, onz); 5048 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5049 5050 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5051 B_mpi->assembled = PETSC_FALSE; 5052 merge->bi = bi; 5053 merge->bj = bj; 5054 merge->buf_ri = buf_ri; 5055 merge->buf_rj = buf_rj; 5056 merge->coi = NULL; 5057 merge->coj = NULL; 5058 merge->owners_co = NULL; 5059 5060 PetscCall(PetscCommDestroy(&comm)); 5061 5062 /* attach the supporting struct to B_mpi for reuse */ 5063 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5064 PetscCall(PetscContainerSetPointer(container, merge)); 5065 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5066 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5067 PetscCall(PetscContainerDestroy(&container)); 5068 *mpimat = B_mpi; 5069 5070 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5071 PetscFunctionReturn(PETSC_SUCCESS); 5072 } 5073 5074 /*@C 5075 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5076 matrices from each processor 5077 5078 Collective 5079 5080 Input Parameters: 5081 + comm - the communicators the parallel matrix will live on 5082 . seqmat - the input sequential matrices 5083 . m - number of local rows (or `PETSC_DECIDE`) 5084 . n - number of local columns (or `PETSC_DECIDE`) 5085 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5086 5087 Output Parameter: 5088 . mpimat - the parallel matrix generated 5089 5090 Level: advanced 5091 5092 Note: 5093 The dimensions of the sequential matrix in each processor MUST be the same. 5094 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5095 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5096 5097 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5098 @*/ 5099 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5100 { 5101 PetscMPIInt size; 5102 5103 PetscFunctionBegin; 5104 PetscCallMPI(MPI_Comm_size(comm, &size)); 5105 if (size == 1) { 5106 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5107 if (scall == MAT_INITIAL_MATRIX) { 5108 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5109 } else { 5110 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5111 } 5112 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5113 PetscFunctionReturn(PETSC_SUCCESS); 5114 } 5115 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5116 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5117 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5118 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5119 PetscFunctionReturn(PETSC_SUCCESS); 5120 } 5121 5122 /*@ 5123 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5124 5125 Not Collective 5126 5127 Input Parameter: 5128 . A - the matrix 5129 5130 Output Parameter: 5131 . A_loc - the local sequential matrix generated 5132 5133 Level: developer 5134 5135 Notes: 5136 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5137 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5138 `n` is the global column count obtained with `MatGetSize()` 5139 5140 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5141 5142 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5143 5144 Destroy the matrix with `MatDestroy()` 5145 5146 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5147 @*/ 5148 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5149 { 5150 PetscBool mpi; 5151 5152 PetscFunctionBegin; 5153 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5154 if (mpi) { 5155 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5156 } else { 5157 *A_loc = A; 5158 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5159 } 5160 PetscFunctionReturn(PETSC_SUCCESS); 5161 } 5162 5163 /*@ 5164 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5165 5166 Not Collective 5167 5168 Input Parameters: 5169 + A - the matrix 5170 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5171 5172 Output Parameter: 5173 . A_loc - the local sequential matrix generated 5174 5175 Level: developer 5176 5177 Notes: 5178 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5179 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5180 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5181 5182 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5183 5184 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5185 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5186 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5187 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5188 5189 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5190 @*/ 5191 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5192 { 5193 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5194 Mat_SeqAIJ *mat, *a, *b; 5195 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5196 const PetscScalar *aa, *ba, *aav, *bav; 5197 PetscScalar *ca, *cam; 5198 PetscMPIInt size; 5199 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5200 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5201 PetscBool match; 5202 5203 PetscFunctionBegin; 5204 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5205 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5206 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5207 if (size == 1) { 5208 if (scall == MAT_INITIAL_MATRIX) { 5209 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5210 *A_loc = mpimat->A; 5211 } else if (scall == MAT_REUSE_MATRIX) { 5212 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5213 } 5214 PetscFunctionReturn(PETSC_SUCCESS); 5215 } 5216 5217 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5218 a = (Mat_SeqAIJ *)mpimat->A->data; 5219 b = (Mat_SeqAIJ *)mpimat->B->data; 5220 ai = a->i; 5221 aj = a->j; 5222 bi = b->i; 5223 bj = b->j; 5224 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5225 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5226 aa = aav; 5227 ba = bav; 5228 if (scall == MAT_INITIAL_MATRIX) { 5229 PetscCall(PetscMalloc1(1 + am, &ci)); 5230 ci[0] = 0; 5231 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5232 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5233 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5234 k = 0; 5235 for (i = 0; i < am; i++) { 5236 ncols_o = bi[i + 1] - bi[i]; 5237 ncols_d = ai[i + 1] - ai[i]; 5238 /* off-diagonal portion of A */ 5239 for (jo = 0; jo < ncols_o; jo++) { 5240 col = cmap[*bj]; 5241 if (col >= cstart) break; 5242 cj[k] = col; 5243 bj++; 5244 ca[k++] = *ba++; 5245 } 5246 /* diagonal portion of A */ 5247 for (j = 0; j < ncols_d; j++) { 5248 cj[k] = cstart + *aj++; 5249 ca[k++] = *aa++; 5250 } 5251 /* off-diagonal portion of A */ 5252 for (j = jo; j < ncols_o; j++) { 5253 cj[k] = cmap[*bj++]; 5254 ca[k++] = *ba++; 5255 } 5256 } 5257 /* put together the new matrix */ 5258 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5259 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5260 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5261 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5262 mat->free_a = PETSC_TRUE; 5263 mat->free_ij = PETSC_TRUE; 5264 mat->nonew = 0; 5265 } else if (scall == MAT_REUSE_MATRIX) { 5266 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5267 ci = mat->i; 5268 cj = mat->j; 5269 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5270 for (i = 0; i < am; i++) { 5271 /* off-diagonal portion of A */ 5272 ncols_o = bi[i + 1] - bi[i]; 5273 for (jo = 0; jo < ncols_o; jo++) { 5274 col = cmap[*bj]; 5275 if (col >= cstart) break; 5276 *cam++ = *ba++; 5277 bj++; 5278 } 5279 /* diagonal portion of A */ 5280 ncols_d = ai[i + 1] - ai[i]; 5281 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5282 /* off-diagonal portion of A */ 5283 for (j = jo; j < ncols_o; j++) { 5284 *cam++ = *ba++; 5285 bj++; 5286 } 5287 } 5288 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5289 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5290 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5291 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5292 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5293 PetscFunctionReturn(PETSC_SUCCESS); 5294 } 5295 5296 /*@ 5297 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5298 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5299 5300 Not Collective 5301 5302 Input Parameters: 5303 + A - the matrix 5304 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5305 5306 Output Parameters: 5307 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5308 - A_loc - the local sequential matrix generated 5309 5310 Level: developer 5311 5312 Note: 5313 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5314 part, then those associated with the off-diagonal part (in its local ordering) 5315 5316 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5317 @*/ 5318 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5319 { 5320 Mat Ao, Ad; 5321 const PetscInt *cmap; 5322 PetscMPIInt size; 5323 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5324 5325 PetscFunctionBegin; 5326 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5327 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5328 if (size == 1) { 5329 if (scall == MAT_INITIAL_MATRIX) { 5330 PetscCall(PetscObjectReference((PetscObject)Ad)); 5331 *A_loc = Ad; 5332 } else if (scall == MAT_REUSE_MATRIX) { 5333 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5334 } 5335 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5336 PetscFunctionReturn(PETSC_SUCCESS); 5337 } 5338 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5339 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5340 if (f) { 5341 PetscCall((*f)(A, scall, glob, A_loc)); 5342 } else { 5343 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5344 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5345 Mat_SeqAIJ *c; 5346 PetscInt *ai = a->i, *aj = a->j; 5347 PetscInt *bi = b->i, *bj = b->j; 5348 PetscInt *ci, *cj; 5349 const PetscScalar *aa, *ba; 5350 PetscScalar *ca; 5351 PetscInt i, j, am, dn, on; 5352 5353 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5354 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5355 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5356 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5357 if (scall == MAT_INITIAL_MATRIX) { 5358 PetscInt k; 5359 PetscCall(PetscMalloc1(1 + am, &ci)); 5360 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5361 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5362 ci[0] = 0; 5363 for (i = 0, k = 0; i < am; i++) { 5364 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5365 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5366 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5367 /* diagonal portion of A */ 5368 for (j = 0; j < ncols_d; j++, k++) { 5369 cj[k] = *aj++; 5370 ca[k] = *aa++; 5371 } 5372 /* off-diagonal portion of A */ 5373 for (j = 0; j < ncols_o; j++, k++) { 5374 cj[k] = dn + *bj++; 5375 ca[k] = *ba++; 5376 } 5377 } 5378 /* put together the new matrix */ 5379 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5380 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5381 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5382 c = (Mat_SeqAIJ *)(*A_loc)->data; 5383 c->free_a = PETSC_TRUE; 5384 c->free_ij = PETSC_TRUE; 5385 c->nonew = 0; 5386 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5387 } else if (scall == MAT_REUSE_MATRIX) { 5388 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5389 for (i = 0; i < am; i++) { 5390 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5391 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5392 /* diagonal portion of A */ 5393 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5394 /* off-diagonal portion of A */ 5395 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5396 } 5397 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5398 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5399 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5400 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5401 if (glob) { 5402 PetscInt cst, *gidx; 5403 5404 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5405 PetscCall(PetscMalloc1(dn + on, &gidx)); 5406 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5407 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5408 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5409 } 5410 } 5411 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5412 PetscFunctionReturn(PETSC_SUCCESS); 5413 } 5414 5415 /*@C 5416 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5417 5418 Not Collective 5419 5420 Input Parameters: 5421 + A - the matrix 5422 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5423 . row - index set of rows to extract (or `NULL`) 5424 - col - index set of columns to extract (or `NULL`) 5425 5426 Output Parameter: 5427 . A_loc - the local sequential matrix generated 5428 5429 Level: developer 5430 5431 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5432 @*/ 5433 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5434 { 5435 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5436 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5437 IS isrowa, iscola; 5438 Mat *aloc; 5439 PetscBool match; 5440 5441 PetscFunctionBegin; 5442 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5443 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5444 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5445 if (!row) { 5446 start = A->rmap->rstart; 5447 end = A->rmap->rend; 5448 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5449 } else { 5450 isrowa = *row; 5451 } 5452 if (!col) { 5453 start = A->cmap->rstart; 5454 cmap = a->garray; 5455 nzA = a->A->cmap->n; 5456 nzB = a->B->cmap->n; 5457 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5458 ncols = 0; 5459 for (i = 0; i < nzB; i++) { 5460 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5461 else break; 5462 } 5463 imark = i; 5464 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5465 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5466 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5467 } else { 5468 iscola = *col; 5469 } 5470 if (scall != MAT_INITIAL_MATRIX) { 5471 PetscCall(PetscMalloc1(1, &aloc)); 5472 aloc[0] = *A_loc; 5473 } 5474 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5475 if (!col) { /* attach global id of condensed columns */ 5476 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5477 } 5478 *A_loc = aloc[0]; 5479 PetscCall(PetscFree(aloc)); 5480 if (!row) PetscCall(ISDestroy(&isrowa)); 5481 if (!col) PetscCall(ISDestroy(&iscola)); 5482 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5483 PetscFunctionReturn(PETSC_SUCCESS); 5484 } 5485 5486 /* 5487 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5488 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5489 * on a global size. 5490 * */ 5491 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5492 { 5493 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5494 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5495 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5496 PetscMPIInt owner; 5497 PetscSFNode *iremote, *oiremote; 5498 const PetscInt *lrowindices; 5499 PetscSF sf, osf; 5500 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5501 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5502 MPI_Comm comm; 5503 ISLocalToGlobalMapping mapping; 5504 const PetscScalar *pd_a, *po_a; 5505 5506 PetscFunctionBegin; 5507 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5508 /* plocalsize is the number of roots 5509 * nrows is the number of leaves 5510 * */ 5511 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5512 PetscCall(ISGetLocalSize(rows, &nrows)); 5513 PetscCall(PetscCalloc1(nrows, &iremote)); 5514 PetscCall(ISGetIndices(rows, &lrowindices)); 5515 for (i = 0; i < nrows; i++) { 5516 /* Find a remote index and an owner for a row 5517 * The row could be local or remote 5518 * */ 5519 owner = 0; 5520 lidx = 0; 5521 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5522 iremote[i].index = lidx; 5523 iremote[i].rank = owner; 5524 } 5525 /* Create SF to communicate how many nonzero columns for each row */ 5526 PetscCall(PetscSFCreate(comm, &sf)); 5527 /* SF will figure out the number of nonzero columns for each row, and their 5528 * offsets 5529 * */ 5530 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5531 PetscCall(PetscSFSetFromOptions(sf)); 5532 PetscCall(PetscSFSetUp(sf)); 5533 5534 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5535 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5536 PetscCall(PetscCalloc1(nrows, &pnnz)); 5537 roffsets[0] = 0; 5538 roffsets[1] = 0; 5539 for (i = 0; i < plocalsize; i++) { 5540 /* diagonal */ 5541 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5542 /* off-diagonal */ 5543 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5544 /* compute offsets so that we relative location for each row */ 5545 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5546 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5547 } 5548 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5549 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5550 /* 'r' means root, and 'l' means leaf */ 5551 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5552 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5554 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5555 PetscCall(PetscSFDestroy(&sf)); 5556 PetscCall(PetscFree(roffsets)); 5557 PetscCall(PetscFree(nrcols)); 5558 dntotalcols = 0; 5559 ontotalcols = 0; 5560 ncol = 0; 5561 for (i = 0; i < nrows; i++) { 5562 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5563 ncol = PetscMax(pnnz[i], ncol); 5564 /* diagonal */ 5565 dntotalcols += nlcols[i * 2 + 0]; 5566 /* off-diagonal */ 5567 ontotalcols += nlcols[i * 2 + 1]; 5568 } 5569 /* We do not need to figure the right number of columns 5570 * since all the calculations will be done by going through the raw data 5571 * */ 5572 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5573 PetscCall(MatSetUp(*P_oth)); 5574 PetscCall(PetscFree(pnnz)); 5575 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5576 /* diagonal */ 5577 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5578 /* off-diagonal */ 5579 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5580 /* diagonal */ 5581 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5582 /* off-diagonal */ 5583 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5584 dntotalcols = 0; 5585 ontotalcols = 0; 5586 ntotalcols = 0; 5587 for (i = 0; i < nrows; i++) { 5588 owner = 0; 5589 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5590 /* Set iremote for diag matrix */ 5591 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5592 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5593 iremote[dntotalcols].rank = owner; 5594 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5595 ilocal[dntotalcols++] = ntotalcols++; 5596 } 5597 /* off-diagonal */ 5598 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5599 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5600 oiremote[ontotalcols].rank = owner; 5601 oilocal[ontotalcols++] = ntotalcols++; 5602 } 5603 } 5604 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5605 PetscCall(PetscFree(loffsets)); 5606 PetscCall(PetscFree(nlcols)); 5607 PetscCall(PetscSFCreate(comm, &sf)); 5608 /* P serves as roots and P_oth is leaves 5609 * Diag matrix 5610 * */ 5611 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5612 PetscCall(PetscSFSetFromOptions(sf)); 5613 PetscCall(PetscSFSetUp(sf)); 5614 5615 PetscCall(PetscSFCreate(comm, &osf)); 5616 /* off-diagonal */ 5617 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5618 PetscCall(PetscSFSetFromOptions(osf)); 5619 PetscCall(PetscSFSetUp(osf)); 5620 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5621 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5622 /* operate on the matrix internal data to save memory */ 5623 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5624 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5625 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5626 /* Convert to global indices for diag matrix */ 5627 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5628 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5629 /* We want P_oth store global indices */ 5630 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5631 /* Use memory scalable approach */ 5632 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5633 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5634 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5635 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5636 /* Convert back to local indices */ 5637 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5638 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5639 nout = 0; 5640 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5641 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5642 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5643 /* Exchange values */ 5644 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5645 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5647 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5648 /* Stop PETSc from shrinking memory */ 5649 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5650 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5651 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5652 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5653 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5654 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5655 PetscCall(PetscSFDestroy(&sf)); 5656 PetscCall(PetscSFDestroy(&osf)); 5657 PetscFunctionReturn(PETSC_SUCCESS); 5658 } 5659 5660 /* 5661 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5662 * This supports MPIAIJ and MAIJ 5663 * */ 5664 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5665 { 5666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5667 Mat_SeqAIJ *p_oth; 5668 IS rows, map; 5669 PetscHMapI hamp; 5670 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5671 MPI_Comm comm; 5672 PetscSF sf, osf; 5673 PetscBool has; 5674 5675 PetscFunctionBegin; 5676 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5677 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5678 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5679 * and then create a submatrix (that often is an overlapping matrix) 5680 * */ 5681 if (reuse == MAT_INITIAL_MATRIX) { 5682 /* Use a hash table to figure out unique keys */ 5683 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5684 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5685 count = 0; 5686 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5687 for (i = 0; i < a->B->cmap->n; i++) { 5688 key = a->garray[i] / dof; 5689 PetscCall(PetscHMapIHas(hamp, key, &has)); 5690 if (!has) { 5691 mapping[i] = count; 5692 PetscCall(PetscHMapISet(hamp, key, count++)); 5693 } else { 5694 /* Current 'i' has the same value the previous step */ 5695 mapping[i] = count - 1; 5696 } 5697 } 5698 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5699 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5700 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5701 PetscCall(PetscCalloc1(htsize, &rowindices)); 5702 off = 0; 5703 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5704 PetscCall(PetscHMapIDestroy(&hamp)); 5705 PetscCall(PetscSortInt(htsize, rowindices)); 5706 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5707 /* In case, the matrix was already created but users want to recreate the matrix */ 5708 PetscCall(MatDestroy(P_oth)); 5709 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5710 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5711 PetscCall(ISDestroy(&map)); 5712 PetscCall(ISDestroy(&rows)); 5713 } else if (reuse == MAT_REUSE_MATRIX) { 5714 /* If matrix was already created, we simply update values using SF objects 5715 * that as attached to the matrix earlier. 5716 */ 5717 const PetscScalar *pd_a, *po_a; 5718 5719 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5720 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5721 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5722 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5723 /* Update values in place */ 5724 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5725 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5726 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5727 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5728 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5731 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5732 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5733 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5734 PetscFunctionReturn(PETSC_SUCCESS); 5735 } 5736 5737 /*@C 5738 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5739 5740 Collective 5741 5742 Input Parameters: 5743 + A - the first matrix in `MATMPIAIJ` format 5744 . B - the second matrix in `MATMPIAIJ` format 5745 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5746 5747 Output Parameters: 5748 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5749 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5750 - B_seq - the sequential matrix generated 5751 5752 Level: developer 5753 5754 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5755 @*/ 5756 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5757 { 5758 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5759 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5760 IS isrowb, iscolb; 5761 Mat *bseq = NULL; 5762 5763 PetscFunctionBegin; 5764 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5765 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5766 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5767 5768 if (scall == MAT_INITIAL_MATRIX) { 5769 start = A->cmap->rstart; 5770 cmap = a->garray; 5771 nzA = a->A->cmap->n; 5772 nzB = a->B->cmap->n; 5773 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5774 ncols = 0; 5775 for (i = 0; i < nzB; i++) { /* row < local row index */ 5776 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5777 else break; 5778 } 5779 imark = i; 5780 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5781 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5782 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5783 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5784 } else { 5785 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5786 isrowb = *rowb; 5787 iscolb = *colb; 5788 PetscCall(PetscMalloc1(1, &bseq)); 5789 bseq[0] = *B_seq; 5790 } 5791 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5792 *B_seq = bseq[0]; 5793 PetscCall(PetscFree(bseq)); 5794 if (!rowb) { 5795 PetscCall(ISDestroy(&isrowb)); 5796 } else { 5797 *rowb = isrowb; 5798 } 5799 if (!colb) { 5800 PetscCall(ISDestroy(&iscolb)); 5801 } else { 5802 *colb = iscolb; 5803 } 5804 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5805 PetscFunctionReturn(PETSC_SUCCESS); 5806 } 5807 5808 /* 5809 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5810 of the OFF-DIAGONAL portion of local A 5811 5812 Collective 5813 5814 Input Parameters: 5815 + A,B - the matrices in `MATMPIAIJ` format 5816 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5817 5818 Output Parameter: 5819 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5820 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5821 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5822 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5823 5824 Developer Note: 5825 This directly accesses information inside the VecScatter associated with the matrix-vector product 5826 for this matrix. This is not desirable.. 5827 5828 Level: developer 5829 5830 */ 5831 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5832 { 5833 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5834 Mat_SeqAIJ *b_oth; 5835 VecScatter ctx; 5836 MPI_Comm comm; 5837 const PetscMPIInt *rprocs, *sprocs; 5838 const PetscInt *srow, *rstarts, *sstarts; 5839 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5840 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5841 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5842 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5843 PetscMPIInt size, tag, rank, nreqs; 5844 5845 PetscFunctionBegin; 5846 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5847 PetscCallMPI(MPI_Comm_size(comm, &size)); 5848 5849 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5850 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5851 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5852 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5853 5854 if (size == 1) { 5855 startsj_s = NULL; 5856 bufa_ptr = NULL; 5857 *B_oth = NULL; 5858 PetscFunctionReturn(PETSC_SUCCESS); 5859 } 5860 5861 ctx = a->Mvctx; 5862 tag = ((PetscObject)ctx)->tag; 5863 5864 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5865 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5866 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5867 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5868 PetscCall(PetscMalloc1(nreqs, &reqs)); 5869 rwaits = reqs; 5870 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5871 5872 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5873 if (scall == MAT_INITIAL_MATRIX) { 5874 /* i-array */ 5875 /* post receives */ 5876 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5877 for (i = 0; i < nrecvs; i++) { 5878 rowlen = rvalues + rstarts[i] * rbs; 5879 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5880 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5881 } 5882 5883 /* pack the outgoing message */ 5884 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5885 5886 sstartsj[0] = 0; 5887 rstartsj[0] = 0; 5888 len = 0; /* total length of j or a array to be sent */ 5889 if (nsends) { 5890 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5891 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5892 } 5893 for (i = 0; i < nsends; i++) { 5894 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5895 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5896 for (j = 0; j < nrows; j++) { 5897 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5898 for (l = 0; l < sbs; l++) { 5899 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5900 5901 rowlen[j * sbs + l] = ncols; 5902 5903 len += ncols; 5904 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5905 } 5906 k++; 5907 } 5908 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5909 5910 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5911 } 5912 /* recvs and sends of i-array are completed */ 5913 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5914 PetscCall(PetscFree(svalues)); 5915 5916 /* allocate buffers for sending j and a arrays */ 5917 PetscCall(PetscMalloc1(len + 1, &bufj)); 5918 PetscCall(PetscMalloc1(len + 1, &bufa)); 5919 5920 /* create i-array of B_oth */ 5921 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5922 5923 b_othi[0] = 0; 5924 len = 0; /* total length of j or a array to be received */ 5925 k = 0; 5926 for (i = 0; i < nrecvs; i++) { 5927 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5928 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5929 for (j = 0; j < nrows; j++) { 5930 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5931 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5932 k++; 5933 } 5934 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5935 } 5936 PetscCall(PetscFree(rvalues)); 5937 5938 /* allocate space for j and a arrays of B_oth */ 5939 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5940 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5941 5942 /* j-array */ 5943 /* post receives of j-array */ 5944 for (i = 0; i < nrecvs; i++) { 5945 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5946 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5947 } 5948 5949 /* pack the outgoing message j-array */ 5950 if (nsends) k = sstarts[0]; 5951 for (i = 0; i < nsends; i++) { 5952 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5953 bufJ = bufj + sstartsj[i]; 5954 for (j = 0; j < nrows; j++) { 5955 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5956 for (ll = 0; ll < sbs; ll++) { 5957 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5958 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5959 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5960 } 5961 } 5962 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5963 } 5964 5965 /* recvs and sends of j-array are completed */ 5966 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5967 } else if (scall == MAT_REUSE_MATRIX) { 5968 sstartsj = *startsj_s; 5969 rstartsj = *startsj_r; 5970 bufa = *bufa_ptr; 5971 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5972 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5973 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5974 5975 /* a-array */ 5976 /* post receives of a-array */ 5977 for (i = 0; i < nrecvs; i++) { 5978 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5979 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5980 } 5981 5982 /* pack the outgoing message a-array */ 5983 if (nsends) k = sstarts[0]; 5984 for (i = 0; i < nsends; i++) { 5985 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5986 bufA = bufa + sstartsj[i]; 5987 for (j = 0; j < nrows; j++) { 5988 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5989 for (ll = 0; ll < sbs; ll++) { 5990 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5991 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5992 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5993 } 5994 } 5995 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5996 } 5997 /* recvs and sends of a-array are completed */ 5998 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5999 PetscCall(PetscFree(reqs)); 6000 6001 if (scall == MAT_INITIAL_MATRIX) { 6002 /* put together the new matrix */ 6003 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6004 6005 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6006 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6007 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6008 b_oth->free_a = PETSC_TRUE; 6009 b_oth->free_ij = PETSC_TRUE; 6010 b_oth->nonew = 0; 6011 6012 PetscCall(PetscFree(bufj)); 6013 if (!startsj_s || !bufa_ptr) { 6014 PetscCall(PetscFree2(sstartsj, rstartsj)); 6015 PetscCall(PetscFree(bufa_ptr)); 6016 } else { 6017 *startsj_s = sstartsj; 6018 *startsj_r = rstartsj; 6019 *bufa_ptr = bufa; 6020 } 6021 } else if (scall == MAT_REUSE_MATRIX) { 6022 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6023 } 6024 6025 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6026 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6027 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6028 PetscFunctionReturn(PETSC_SUCCESS); 6029 } 6030 6031 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6034 #if defined(PETSC_HAVE_MKL_SPARSE) 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6038 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6039 #if defined(PETSC_HAVE_ELEMENTAL) 6040 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6041 #endif 6042 #if defined(PETSC_HAVE_SCALAPACK) 6043 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6044 #endif 6045 #if defined(PETSC_HAVE_HYPRE) 6046 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6047 #endif 6048 #if defined(PETSC_HAVE_CUDA) 6049 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6050 #endif 6051 #if defined(PETSC_HAVE_HIP) 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6053 #endif 6054 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6060 6061 /* 6062 Computes (B'*A')' since computing B*A directly is untenable 6063 6064 n p p 6065 [ ] [ ] [ ] 6066 m [ A ] * n [ B ] = m [ C ] 6067 [ ] [ ] [ ] 6068 6069 */ 6070 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6071 { 6072 Mat At, Bt, Ct; 6073 6074 PetscFunctionBegin; 6075 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6076 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6077 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6078 PetscCall(MatDestroy(&At)); 6079 PetscCall(MatDestroy(&Bt)); 6080 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6081 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6082 PetscCall(MatDestroy(&Ct)); 6083 PetscFunctionReturn(PETSC_SUCCESS); 6084 } 6085 6086 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6087 { 6088 PetscBool cisdense; 6089 6090 PetscFunctionBegin; 6091 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6092 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6093 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6094 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6095 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6096 PetscCall(MatSetUp(C)); 6097 6098 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6099 PetscFunctionReturn(PETSC_SUCCESS); 6100 } 6101 6102 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6103 { 6104 Mat_Product *product = C->product; 6105 Mat A = product->A, B = product->B; 6106 6107 PetscFunctionBegin; 6108 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6109 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6110 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6111 C->ops->productsymbolic = MatProductSymbolic_AB; 6112 PetscFunctionReturn(PETSC_SUCCESS); 6113 } 6114 6115 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6116 { 6117 Mat_Product *product = C->product; 6118 6119 PetscFunctionBegin; 6120 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6121 PetscFunctionReturn(PETSC_SUCCESS); 6122 } 6123 6124 /* 6125 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6126 6127 Input Parameters: 6128 6129 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6130 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6131 6132 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6133 6134 For Set1, j1[] contains column indices of the nonzeros. 6135 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6136 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6137 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6138 6139 Similar for Set2. 6140 6141 This routine merges the two sets of nonzeros row by row and removes repeats. 6142 6143 Output Parameters: (memory is allocated by the caller) 6144 6145 i[],j[]: the CSR of the merged matrix, which has m rows. 6146 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6147 imap2[]: similar to imap1[], but for Set2. 6148 Note we order nonzeros row-by-row and from left to right. 6149 */ 6150 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6151 { 6152 PetscInt r, m; /* Row index of mat */ 6153 PetscCount t, t1, t2, b1, e1, b2, e2; 6154 6155 PetscFunctionBegin; 6156 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6157 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6158 i[0] = 0; 6159 for (r = 0; r < m; r++) { /* Do row by row merging */ 6160 b1 = rowBegin1[r]; 6161 e1 = rowEnd1[r]; 6162 b2 = rowBegin2[r]; 6163 e2 = rowEnd2[r]; 6164 while (b1 < e1 && b2 < e2) { 6165 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6166 j[t] = j1[b1]; 6167 imap1[t1] = t; 6168 imap2[t2] = t; 6169 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6170 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6171 t1++; 6172 t2++; 6173 t++; 6174 } else if (j1[b1] < j2[b2]) { 6175 j[t] = j1[b1]; 6176 imap1[t1] = t; 6177 b1 += jmap1[t1 + 1] - jmap1[t1]; 6178 t1++; 6179 t++; 6180 } else { 6181 j[t] = j2[b2]; 6182 imap2[t2] = t; 6183 b2 += jmap2[t2 + 1] - jmap2[t2]; 6184 t2++; 6185 t++; 6186 } 6187 } 6188 /* Merge the remaining in either j1[] or j2[] */ 6189 while (b1 < e1) { 6190 j[t] = j1[b1]; 6191 imap1[t1] = t; 6192 b1 += jmap1[t1 + 1] - jmap1[t1]; 6193 t1++; 6194 t++; 6195 } 6196 while (b2 < e2) { 6197 j[t] = j2[b2]; 6198 imap2[t2] = t; 6199 b2 += jmap2[t2 + 1] - jmap2[t2]; 6200 t2++; 6201 t++; 6202 } 6203 i[r + 1] = t; 6204 } 6205 PetscFunctionReturn(PETSC_SUCCESS); 6206 } 6207 6208 /* 6209 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6210 6211 Input Parameters: 6212 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6213 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6214 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6215 6216 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6217 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6218 6219 Output Parameters: 6220 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6221 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6222 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6223 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6224 6225 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6226 Atot: number of entries belonging to the diagonal block. 6227 Annz: number of unique nonzeros belonging to the diagonal block. 6228 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6229 repeats (i.e., same 'i,j' pair). 6230 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6231 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6232 6233 Atot: number of entries belonging to the diagonal block 6234 Annz: number of unique nonzeros belonging to the diagonal block. 6235 6236 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6237 6238 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6239 */ 6240 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6241 { 6242 PetscInt cstart, cend, rstart, rend, row, col; 6243 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6244 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6245 PetscCount k, m, p, q, r, s, mid; 6246 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6247 6248 PetscFunctionBegin; 6249 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6250 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6251 m = rend - rstart; 6252 6253 /* Skip negative rows */ 6254 for (k = 0; k < n; k++) 6255 if (i[k] >= 0) break; 6256 6257 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6258 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6259 */ 6260 while (k < n) { 6261 row = i[k]; 6262 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6263 for (s = k; s < n; s++) 6264 if (i[s] != row) break; 6265 6266 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6267 for (p = k; p < s; p++) { 6268 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6269 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6270 } 6271 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6272 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6273 rowBegin[row - rstart] = k; 6274 rowMid[row - rstart] = mid; 6275 rowEnd[row - rstart] = s; 6276 6277 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6278 Atot += mid - k; 6279 Btot += s - mid; 6280 6281 /* Count unique nonzeros of this diag row */ 6282 for (p = k; p < mid;) { 6283 col = j[p]; 6284 do { 6285 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6286 p++; 6287 } while (p < mid && j[p] == col); 6288 Annz++; 6289 } 6290 6291 /* Count unique nonzeros of this offdiag row */ 6292 for (p = mid; p < s;) { 6293 col = j[p]; 6294 do { 6295 p++; 6296 } while (p < s && j[p] == col); 6297 Bnnz++; 6298 } 6299 k = s; 6300 } 6301 6302 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6303 PetscCall(PetscMalloc1(Atot, &Aperm)); 6304 PetscCall(PetscMalloc1(Btot, &Bperm)); 6305 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6306 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6307 6308 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6309 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6310 for (r = 0; r < m; r++) { 6311 k = rowBegin[r]; 6312 mid = rowMid[r]; 6313 s = rowEnd[r]; 6314 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6315 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6316 Atot += mid - k; 6317 Btot += s - mid; 6318 6319 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6320 for (p = k; p < mid;) { 6321 col = j[p]; 6322 q = p; 6323 do { 6324 p++; 6325 } while (p < mid && j[p] == col); 6326 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6327 Annz++; 6328 } 6329 6330 for (p = mid; p < s;) { 6331 col = j[p]; 6332 q = p; 6333 do { 6334 p++; 6335 } while (p < s && j[p] == col); 6336 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6337 Bnnz++; 6338 } 6339 } 6340 /* Output */ 6341 *Aperm_ = Aperm; 6342 *Annz_ = Annz; 6343 *Atot_ = Atot; 6344 *Ajmap_ = Ajmap; 6345 *Bperm_ = Bperm; 6346 *Bnnz_ = Bnnz; 6347 *Btot_ = Btot; 6348 *Bjmap_ = Bjmap; 6349 PetscFunctionReturn(PETSC_SUCCESS); 6350 } 6351 6352 /* 6353 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6354 6355 Input Parameters: 6356 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6357 nnz: number of unique nonzeros in the merged matrix 6358 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6359 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6360 6361 Output Parameter: (memory is allocated by the caller) 6362 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6363 6364 Example: 6365 nnz1 = 4 6366 nnz = 6 6367 imap = [1,3,4,5] 6368 jmap = [0,3,5,6,7] 6369 then, 6370 jmap_new = [0,0,3,3,5,6,7] 6371 */ 6372 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6373 { 6374 PetscCount k, p; 6375 6376 PetscFunctionBegin; 6377 jmap_new[0] = 0; 6378 p = nnz; /* p loops over jmap_new[] backwards */ 6379 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6380 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6381 } 6382 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6383 PetscFunctionReturn(PETSC_SUCCESS); 6384 } 6385 6386 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6387 { 6388 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6389 6390 PetscFunctionBegin; 6391 PetscCall(PetscSFDestroy(&coo->sf)); 6392 PetscCall(PetscFree(coo->Aperm1)); 6393 PetscCall(PetscFree(coo->Bperm1)); 6394 PetscCall(PetscFree(coo->Ajmap1)); 6395 PetscCall(PetscFree(coo->Bjmap1)); 6396 PetscCall(PetscFree(coo->Aimap2)); 6397 PetscCall(PetscFree(coo->Bimap2)); 6398 PetscCall(PetscFree(coo->Aperm2)); 6399 PetscCall(PetscFree(coo->Bperm2)); 6400 PetscCall(PetscFree(coo->Ajmap2)); 6401 PetscCall(PetscFree(coo->Bjmap2)); 6402 PetscCall(PetscFree(coo->Cperm1)); 6403 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6404 PetscCall(PetscFree(coo)); 6405 PetscFunctionReturn(PETSC_SUCCESS); 6406 } 6407 6408 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6409 { 6410 MPI_Comm comm; 6411 PetscMPIInt rank, size; 6412 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6413 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6414 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6415 PetscContainer container; 6416 MatCOOStruct_MPIAIJ *coo; 6417 6418 PetscFunctionBegin; 6419 PetscCall(PetscFree(mpiaij->garray)); 6420 PetscCall(VecDestroy(&mpiaij->lvec)); 6421 #if defined(PETSC_USE_CTABLE) 6422 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6423 #else 6424 PetscCall(PetscFree(mpiaij->colmap)); 6425 #endif 6426 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6427 mat->assembled = PETSC_FALSE; 6428 mat->was_assembled = PETSC_FALSE; 6429 6430 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6431 PetscCallMPI(MPI_Comm_size(comm, &size)); 6432 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6433 PetscCall(PetscLayoutSetUp(mat->rmap)); 6434 PetscCall(PetscLayoutSetUp(mat->cmap)); 6435 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6436 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6437 PetscCall(MatGetLocalSize(mat, &m, &n)); 6438 PetscCall(MatGetSize(mat, &M, &N)); 6439 6440 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6441 /* entries come first, then local rows, then remote rows. */ 6442 PetscCount n1 = coo_n, *perm1; 6443 PetscInt *i1 = coo_i, *j1 = coo_j; 6444 6445 PetscCall(PetscMalloc1(n1, &perm1)); 6446 for (k = 0; k < n1; k++) perm1[k] = k; 6447 6448 /* Manipulate indices so that entries with negative row or col indices will have smallest 6449 row indices, local entries will have greater but negative row indices, and remote entries 6450 will have positive row indices. 6451 */ 6452 for (k = 0; k < n1; k++) { 6453 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6454 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6455 else { 6456 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6457 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6458 } 6459 } 6460 6461 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6462 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6463 6464 /* Advance k to the first entry we need to take care of */ 6465 for (k = 0; k < n1; k++) 6466 if (i1[k] > PETSC_MIN_INT) break; 6467 PetscInt i1start = k; 6468 6469 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6470 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6471 6472 /* Send remote rows to their owner */ 6473 /* Find which rows should be sent to which remote ranks*/ 6474 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6475 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6476 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6477 const PetscInt *ranges; 6478 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6479 6480 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6481 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6482 for (k = rem; k < n1;) { 6483 PetscMPIInt owner; 6484 PetscInt firstRow, lastRow; 6485 6486 /* Locate a row range */ 6487 firstRow = i1[k]; /* first row of this owner */ 6488 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6489 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6490 6491 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6492 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6493 6494 /* All entries in [k,p) belong to this remote owner */ 6495 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6496 PetscMPIInt *sendto2; 6497 PetscInt *nentries2; 6498 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6499 6500 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6501 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6502 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6503 PetscCall(PetscFree2(sendto, nentries2)); 6504 sendto = sendto2; 6505 nentries = nentries2; 6506 maxNsend = maxNsend2; 6507 } 6508 sendto[nsend] = owner; 6509 nentries[nsend] = p - k; 6510 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6511 nsend++; 6512 k = p; 6513 } 6514 6515 /* Build 1st SF to know offsets on remote to send data */ 6516 PetscSF sf1; 6517 PetscInt nroots = 1, nroots2 = 0; 6518 PetscInt nleaves = nsend, nleaves2 = 0; 6519 PetscInt *offsets; 6520 PetscSFNode *iremote; 6521 6522 PetscCall(PetscSFCreate(comm, &sf1)); 6523 PetscCall(PetscMalloc1(nsend, &iremote)); 6524 PetscCall(PetscMalloc1(nsend, &offsets)); 6525 for (k = 0; k < nsend; k++) { 6526 iremote[k].rank = sendto[k]; 6527 iremote[k].index = 0; 6528 nleaves2 += nentries[k]; 6529 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6530 } 6531 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6532 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6533 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6534 PetscCall(PetscSFDestroy(&sf1)); 6535 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6536 6537 /* Build 2nd SF to send remote COOs to their owner */ 6538 PetscSF sf2; 6539 nroots = nroots2; 6540 nleaves = nleaves2; 6541 PetscCall(PetscSFCreate(comm, &sf2)); 6542 PetscCall(PetscSFSetFromOptions(sf2)); 6543 PetscCall(PetscMalloc1(nleaves, &iremote)); 6544 p = 0; 6545 for (k = 0; k < nsend; k++) { 6546 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6547 for (q = 0; q < nentries[k]; q++, p++) { 6548 iremote[p].rank = sendto[k]; 6549 iremote[p].index = offsets[k] + q; 6550 } 6551 } 6552 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6553 6554 /* Send the remote COOs to their owner */ 6555 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6556 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6557 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6558 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6561 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6562 6563 PetscCall(PetscFree(offsets)); 6564 PetscCall(PetscFree2(sendto, nentries)); 6565 6566 /* Sort received COOs by row along with the permutation array */ 6567 for (k = 0; k < n2; k++) perm2[k] = k; 6568 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6569 6570 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6571 PetscCount *Cperm1; 6572 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6573 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, nleaves)); 6574 6575 /* Support for HYPRE matrices, kind of a hack. 6576 Swap min column with diagonal so that diagonal values will go first */ 6577 PetscBool hypre; 6578 const char *name; 6579 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6580 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6581 if (hypre) { 6582 PetscInt *minj; 6583 PetscBT hasdiag; 6584 6585 PetscCall(PetscBTCreate(m, &hasdiag)); 6586 PetscCall(PetscMalloc1(m, &minj)); 6587 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6588 for (k = i1start; k < rem; k++) { 6589 if (j1[k] < cstart || j1[k] >= cend) continue; 6590 const PetscInt rindex = i1[k] - rstart; 6591 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6592 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6593 } 6594 for (k = 0; k < n2; k++) { 6595 if (j2[k] < cstart || j2[k] >= cend) continue; 6596 const PetscInt rindex = i2[k] - rstart; 6597 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6598 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6599 } 6600 for (k = i1start; k < rem; k++) { 6601 const PetscInt rindex = i1[k] - rstart; 6602 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6603 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6604 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6605 } 6606 for (k = 0; k < n2; k++) { 6607 const PetscInt rindex = i2[k] - rstart; 6608 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6609 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6610 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6611 } 6612 PetscCall(PetscBTDestroy(&hasdiag)); 6613 PetscCall(PetscFree(minj)); 6614 } 6615 6616 /* Split local COOs and received COOs into diag/offdiag portions */ 6617 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6618 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6619 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6620 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6621 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6622 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6623 6624 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6625 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6626 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6627 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6628 6629 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6630 PetscInt *Ai, *Bi; 6631 PetscInt *Aj, *Bj; 6632 6633 PetscCall(PetscMalloc1(m + 1, &Ai)); 6634 PetscCall(PetscMalloc1(m + 1, &Bi)); 6635 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6636 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6637 6638 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6639 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6640 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6641 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6642 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6643 6644 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6645 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6646 6647 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6648 /* expect nonzeros in A/B most likely have local contributing entries */ 6649 PetscInt Annz = Ai[m]; 6650 PetscInt Bnnz = Bi[m]; 6651 PetscCount *Ajmap1_new, *Bjmap1_new; 6652 6653 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6654 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6655 6656 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6657 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6658 6659 PetscCall(PetscFree(Aimap1)); 6660 PetscCall(PetscFree(Ajmap1)); 6661 PetscCall(PetscFree(Bimap1)); 6662 PetscCall(PetscFree(Bjmap1)); 6663 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6664 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6665 PetscCall(PetscFree(perm1)); 6666 PetscCall(PetscFree3(i2, j2, perm2)); 6667 6668 Ajmap1 = Ajmap1_new; 6669 Bjmap1 = Bjmap1_new; 6670 6671 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6672 if (Annz < Annz1 + Annz2) { 6673 PetscInt *Aj_new; 6674 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6675 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6676 PetscCall(PetscFree(Aj)); 6677 Aj = Aj_new; 6678 } 6679 6680 if (Bnnz < Bnnz1 + Bnnz2) { 6681 PetscInt *Bj_new; 6682 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6683 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6684 PetscCall(PetscFree(Bj)); 6685 Bj = Bj_new; 6686 } 6687 6688 /* Create new submatrices for on-process and off-process coupling */ 6689 PetscScalar *Aa, *Ba; 6690 MatType rtype; 6691 Mat_SeqAIJ *a, *b; 6692 PetscObjectState state; 6693 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6694 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6695 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6696 if (cstart) { 6697 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6698 } 6699 6700 PetscCall(MatGetRootType_Private(mat, &rtype)); 6701 6702 MatSeqXAIJGetOptions_Private(mpiaij->A); 6703 PetscCall(MatDestroy(&mpiaij->A)); 6704 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6705 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6706 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6707 6708 MatSeqXAIJGetOptions_Private(mpiaij->B); 6709 PetscCall(MatDestroy(&mpiaij->B)); 6710 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6711 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6712 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6713 6714 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6715 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6716 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6717 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6718 6719 a = (Mat_SeqAIJ *)mpiaij->A->data; 6720 b = (Mat_SeqAIJ *)mpiaij->B->data; 6721 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6722 a->free_a = b->free_a = PETSC_TRUE; 6723 a->free_ij = b->free_ij = PETSC_TRUE; 6724 6725 /* conversion must happen AFTER multiply setup */ 6726 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6727 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6728 PetscCall(VecDestroy(&mpiaij->lvec)); 6729 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6730 6731 // Put the COO struct in a container and then attach that to the matrix 6732 PetscCall(PetscMalloc1(1, &coo)); 6733 coo->n = coo_n; 6734 coo->sf = sf2; 6735 coo->sendlen = nleaves; 6736 coo->recvlen = nroots; 6737 coo->Annz = Annz; 6738 coo->Bnnz = Bnnz; 6739 coo->Annz2 = Annz2; 6740 coo->Bnnz2 = Bnnz2; 6741 coo->Atot1 = Atot1; 6742 coo->Atot2 = Atot2; 6743 coo->Btot1 = Btot1; 6744 coo->Btot2 = Btot2; 6745 coo->Ajmap1 = Ajmap1; 6746 coo->Aperm1 = Aperm1; 6747 coo->Bjmap1 = Bjmap1; 6748 coo->Bperm1 = Bperm1; 6749 coo->Aimap2 = Aimap2; 6750 coo->Ajmap2 = Ajmap2; 6751 coo->Aperm2 = Aperm2; 6752 coo->Bimap2 = Bimap2; 6753 coo->Bjmap2 = Bjmap2; 6754 coo->Bperm2 = Bperm2; 6755 coo->Cperm1 = Cperm1; 6756 // Allocate in preallocation. If not used, it has zero cost on host 6757 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6758 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6759 PetscCall(PetscContainerSetPointer(container, coo)); 6760 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6761 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6762 PetscCall(PetscContainerDestroy(&container)); 6763 PetscFunctionReturn(PETSC_SUCCESS); 6764 } 6765 6766 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6767 { 6768 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6769 Mat A = mpiaij->A, B = mpiaij->B; 6770 PetscScalar *Aa, *Ba; 6771 PetscScalar *sendbuf, *recvbuf; 6772 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6773 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6774 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6775 const PetscCount *Cperm1; 6776 PetscContainer container; 6777 MatCOOStruct_MPIAIJ *coo; 6778 6779 PetscFunctionBegin; 6780 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6781 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6782 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6783 sendbuf = coo->sendbuf; 6784 recvbuf = coo->recvbuf; 6785 Ajmap1 = coo->Ajmap1; 6786 Ajmap2 = coo->Ajmap2; 6787 Aimap2 = coo->Aimap2; 6788 Bjmap1 = coo->Bjmap1; 6789 Bjmap2 = coo->Bjmap2; 6790 Bimap2 = coo->Bimap2; 6791 Aperm1 = coo->Aperm1; 6792 Aperm2 = coo->Aperm2; 6793 Bperm1 = coo->Bperm1; 6794 Bperm2 = coo->Bperm2; 6795 Cperm1 = coo->Cperm1; 6796 6797 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6798 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6799 6800 /* Pack entries to be sent to remote */ 6801 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6802 6803 /* Send remote entries to their owner and overlap the communication with local computation */ 6804 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6805 /* Add local entries to A and B */ 6806 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6807 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6808 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6809 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6810 } 6811 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6812 PetscScalar sum = 0.0; 6813 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6814 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6815 } 6816 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6817 6818 /* Add received remote entries to A and B */ 6819 for (PetscCount i = 0; i < coo->Annz2; i++) { 6820 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6821 } 6822 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6823 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6824 } 6825 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6826 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6827 PetscFunctionReturn(PETSC_SUCCESS); 6828 } 6829 6830 /*MC 6831 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6832 6833 Options Database Keys: 6834 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6835 6836 Level: beginner 6837 6838 Notes: 6839 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6840 in this case the values associated with the rows and columns one passes in are set to zero 6841 in the matrix 6842 6843 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6844 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6845 6846 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6847 M*/ 6848 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6849 { 6850 Mat_MPIAIJ *b; 6851 PetscMPIInt size; 6852 6853 PetscFunctionBegin; 6854 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6855 6856 PetscCall(PetscNew(&b)); 6857 B->data = (void *)b; 6858 B->ops[0] = MatOps_Values; 6859 B->assembled = PETSC_FALSE; 6860 B->insertmode = NOT_SET_VALUES; 6861 b->size = size; 6862 6863 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6864 6865 /* build cache for off array entries formed */ 6866 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6867 6868 b->donotstash = PETSC_FALSE; 6869 b->colmap = NULL; 6870 b->garray = NULL; 6871 b->roworiented = PETSC_TRUE; 6872 6873 /* stuff used for matrix vector multiply */ 6874 b->lvec = NULL; 6875 b->Mvctx = NULL; 6876 6877 /* stuff for MatGetRow() */ 6878 b->rowindices = NULL; 6879 b->rowvalues = NULL; 6880 b->getrowactive = PETSC_FALSE; 6881 6882 /* flexible pointer used in CUSPARSE classes */ 6883 b->spptr = NULL; 6884 6885 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6895 #if defined(PETSC_HAVE_CUDA) 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6897 #endif 6898 #if defined(PETSC_HAVE_HIP) 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6900 #endif 6901 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6903 #endif 6904 #if defined(PETSC_HAVE_MKL_SPARSE) 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6906 #endif 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6911 #if defined(PETSC_HAVE_ELEMENTAL) 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6913 #endif 6914 #if defined(PETSC_HAVE_SCALAPACK) 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6916 #endif 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6919 #if defined(PETSC_HAVE_HYPRE) 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6922 #endif 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6927 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6928 PetscFunctionReturn(PETSC_SUCCESS); 6929 } 6930 6931 /*@C 6932 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6933 and "off-diagonal" part of the matrix in CSR format. 6934 6935 Collective 6936 6937 Input Parameters: 6938 + comm - MPI communicator 6939 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6940 . n - This value should be the same as the local size used in creating the 6941 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6942 calculated if `N` is given) For square matrices `n` is almost always `m`. 6943 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6944 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6945 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6946 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6947 . a - matrix values 6948 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6949 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6950 - oa - matrix values 6951 6952 Output Parameter: 6953 . mat - the matrix 6954 6955 Level: advanced 6956 6957 Notes: 6958 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6959 must free the arrays once the matrix has been destroyed and not before. 6960 6961 The `i` and `j` indices are 0 based 6962 6963 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6964 6965 This sets local rows and cannot be used to set off-processor values. 6966 6967 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6968 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6969 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6970 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6971 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6972 communication if it is known that only local entries will be set. 6973 6974 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6975 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6976 @*/ 6977 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6978 { 6979 Mat_MPIAIJ *maij; 6980 6981 PetscFunctionBegin; 6982 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6983 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6984 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6985 PetscCall(MatCreate(comm, mat)); 6986 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6987 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6988 maij = (Mat_MPIAIJ *)(*mat)->data; 6989 6990 (*mat)->preallocated = PETSC_TRUE; 6991 6992 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6993 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6994 6995 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6996 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6997 6998 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6999 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7000 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7001 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7002 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7003 PetscFunctionReturn(PETSC_SUCCESS); 7004 } 7005 7006 typedef struct { 7007 Mat *mp; /* intermediate products */ 7008 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7009 PetscInt cp; /* number of intermediate products */ 7010 7011 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7012 PetscInt *startsj_s, *startsj_r; 7013 PetscScalar *bufa; 7014 Mat P_oth; 7015 7016 /* may take advantage of merging product->B */ 7017 Mat Bloc; /* B-local by merging diag and off-diag */ 7018 7019 /* cusparse does not have support to split between symbolic and numeric phases. 7020 When api_user is true, we don't need to update the numerical values 7021 of the temporary storage */ 7022 PetscBool reusesym; 7023 7024 /* support for COO values insertion */ 7025 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7026 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7027 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7028 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7029 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7030 PetscMemType mtype; 7031 7032 /* customization */ 7033 PetscBool abmerge; 7034 PetscBool P_oth_bind; 7035 } MatMatMPIAIJBACKEND; 7036 7037 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7038 { 7039 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7040 PetscInt i; 7041 7042 PetscFunctionBegin; 7043 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7044 PetscCall(PetscFree(mmdata->bufa)); 7045 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7046 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7047 PetscCall(MatDestroy(&mmdata->P_oth)); 7048 PetscCall(MatDestroy(&mmdata->Bloc)); 7049 PetscCall(PetscSFDestroy(&mmdata->sf)); 7050 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7051 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7052 PetscCall(PetscFree(mmdata->own[0])); 7053 PetscCall(PetscFree(mmdata->own)); 7054 PetscCall(PetscFree(mmdata->off[0])); 7055 PetscCall(PetscFree(mmdata->off)); 7056 PetscCall(PetscFree(mmdata)); 7057 PetscFunctionReturn(PETSC_SUCCESS); 7058 } 7059 7060 /* Copy selected n entries with indices in idx[] of A to v[]. 7061 If idx is NULL, copy the whole data array of A to v[] 7062 */ 7063 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7064 { 7065 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7066 7067 PetscFunctionBegin; 7068 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7069 if (f) { 7070 PetscCall((*f)(A, n, idx, v)); 7071 } else { 7072 const PetscScalar *vv; 7073 7074 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7075 if (n && idx) { 7076 PetscScalar *w = v; 7077 const PetscInt *oi = idx; 7078 PetscInt j; 7079 7080 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7081 } else { 7082 PetscCall(PetscArraycpy(v, vv, n)); 7083 } 7084 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7085 } 7086 PetscFunctionReturn(PETSC_SUCCESS); 7087 } 7088 7089 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7090 { 7091 MatMatMPIAIJBACKEND *mmdata; 7092 PetscInt i, n_d, n_o; 7093 7094 PetscFunctionBegin; 7095 MatCheckProduct(C, 1); 7096 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7097 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7098 if (!mmdata->reusesym) { /* update temporary matrices */ 7099 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7100 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7101 } 7102 mmdata->reusesym = PETSC_FALSE; 7103 7104 for (i = 0; i < mmdata->cp; i++) { 7105 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7106 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7107 } 7108 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7109 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7110 7111 if (mmdata->mptmp[i]) continue; 7112 if (noff) { 7113 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7114 7115 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7116 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7117 n_o += noff; 7118 n_d += nown; 7119 } else { 7120 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7121 7122 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7123 n_d += mm->nz; 7124 } 7125 } 7126 if (mmdata->hasoffproc) { /* offprocess insertion */ 7127 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7128 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7129 } 7130 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7131 PetscFunctionReturn(PETSC_SUCCESS); 7132 } 7133 7134 /* Support for Pt * A, A * P, or Pt * A * P */ 7135 #define MAX_NUMBER_INTERMEDIATE 4 7136 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7137 { 7138 Mat_Product *product = C->product; 7139 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7140 Mat_MPIAIJ *a, *p; 7141 MatMatMPIAIJBACKEND *mmdata; 7142 ISLocalToGlobalMapping P_oth_l2g = NULL; 7143 IS glob = NULL; 7144 const char *prefix; 7145 char pprefix[256]; 7146 const PetscInt *globidx, *P_oth_idx; 7147 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7148 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7149 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7150 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7151 /* a base offset; type-2: sparse with a local to global map table */ 7152 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7153 7154 MatProductType ptype; 7155 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7156 PetscMPIInt size; 7157 7158 PetscFunctionBegin; 7159 MatCheckProduct(C, 1); 7160 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7161 ptype = product->type; 7162 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7163 ptype = MATPRODUCT_AB; 7164 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7165 } 7166 switch (ptype) { 7167 case MATPRODUCT_AB: 7168 A = product->A; 7169 P = product->B; 7170 m = A->rmap->n; 7171 n = P->cmap->n; 7172 M = A->rmap->N; 7173 N = P->cmap->N; 7174 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7175 break; 7176 case MATPRODUCT_AtB: 7177 P = product->A; 7178 A = product->B; 7179 m = P->cmap->n; 7180 n = A->cmap->n; 7181 M = P->cmap->N; 7182 N = A->cmap->N; 7183 hasoffproc = PETSC_TRUE; 7184 break; 7185 case MATPRODUCT_PtAP: 7186 A = product->A; 7187 P = product->B; 7188 m = P->cmap->n; 7189 n = P->cmap->n; 7190 M = P->cmap->N; 7191 N = P->cmap->N; 7192 hasoffproc = PETSC_TRUE; 7193 break; 7194 default: 7195 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7196 } 7197 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7198 if (size == 1) hasoffproc = PETSC_FALSE; 7199 7200 /* defaults */ 7201 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7202 mp[i] = NULL; 7203 mptmp[i] = PETSC_FALSE; 7204 rmapt[i] = -1; 7205 cmapt[i] = -1; 7206 rmapa[i] = NULL; 7207 cmapa[i] = NULL; 7208 } 7209 7210 /* customization */ 7211 PetscCall(PetscNew(&mmdata)); 7212 mmdata->reusesym = product->api_user; 7213 if (ptype == MATPRODUCT_AB) { 7214 if (product->api_user) { 7215 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7216 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7217 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7218 PetscOptionsEnd(); 7219 } else { 7220 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7221 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7222 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7223 PetscOptionsEnd(); 7224 } 7225 } else if (ptype == MATPRODUCT_PtAP) { 7226 if (product->api_user) { 7227 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7228 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7229 PetscOptionsEnd(); 7230 } else { 7231 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7232 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7233 PetscOptionsEnd(); 7234 } 7235 } 7236 a = (Mat_MPIAIJ *)A->data; 7237 p = (Mat_MPIAIJ *)P->data; 7238 PetscCall(MatSetSizes(C, m, n, M, N)); 7239 PetscCall(PetscLayoutSetUp(C->rmap)); 7240 PetscCall(PetscLayoutSetUp(C->cmap)); 7241 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7242 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7243 7244 cp = 0; 7245 switch (ptype) { 7246 case MATPRODUCT_AB: /* A * P */ 7247 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7248 7249 /* A_diag * P_local (merged or not) */ 7250 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7251 /* P is product->B */ 7252 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7253 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7254 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7255 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7256 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7257 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7258 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7259 mp[cp]->product->api_user = product->api_user; 7260 PetscCall(MatProductSetFromOptions(mp[cp])); 7261 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7262 PetscCall(ISGetIndices(glob, &globidx)); 7263 rmapt[cp] = 1; 7264 cmapt[cp] = 2; 7265 cmapa[cp] = globidx; 7266 mptmp[cp] = PETSC_FALSE; 7267 cp++; 7268 } else { /* A_diag * P_diag and A_diag * P_off */ 7269 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7270 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7271 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7272 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7273 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7274 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7275 mp[cp]->product->api_user = product->api_user; 7276 PetscCall(MatProductSetFromOptions(mp[cp])); 7277 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7278 rmapt[cp] = 1; 7279 cmapt[cp] = 1; 7280 mptmp[cp] = PETSC_FALSE; 7281 cp++; 7282 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 rmapt[cp] = 1; 7292 cmapt[cp] = 2; 7293 cmapa[cp] = p->garray; 7294 mptmp[cp] = PETSC_FALSE; 7295 cp++; 7296 } 7297 7298 /* A_off * P_other */ 7299 if (mmdata->P_oth) { 7300 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7301 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7302 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7303 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7304 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7305 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7306 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7307 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7308 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7309 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7310 mp[cp]->product->api_user = product->api_user; 7311 PetscCall(MatProductSetFromOptions(mp[cp])); 7312 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7313 rmapt[cp] = 1; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = P_oth_idx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 } 7319 break; 7320 7321 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7322 /* A is product->B */ 7323 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7324 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7325 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7326 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7327 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7328 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7329 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7330 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7331 mp[cp]->product->api_user = product->api_user; 7332 PetscCall(MatProductSetFromOptions(mp[cp])); 7333 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7334 PetscCall(ISGetIndices(glob, &globidx)); 7335 rmapt[cp] = 2; 7336 rmapa[cp] = globidx; 7337 cmapt[cp] = 2; 7338 cmapa[cp] = globidx; 7339 mptmp[cp] = PETSC_FALSE; 7340 cp++; 7341 } else { 7342 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7343 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7344 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7345 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7346 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7347 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7348 mp[cp]->product->api_user = product->api_user; 7349 PetscCall(MatProductSetFromOptions(mp[cp])); 7350 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7351 PetscCall(ISGetIndices(glob, &globidx)); 7352 rmapt[cp] = 1; 7353 cmapt[cp] = 2; 7354 cmapa[cp] = globidx; 7355 mptmp[cp] = PETSC_FALSE; 7356 cp++; 7357 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 rmapt[cp] = 2; 7367 rmapa[cp] = p->garray; 7368 cmapt[cp] = 2; 7369 cmapa[cp] = globidx; 7370 mptmp[cp] = PETSC_FALSE; 7371 cp++; 7372 } 7373 break; 7374 case MATPRODUCT_PtAP: 7375 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7376 /* P is product->B */ 7377 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7378 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7379 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7380 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7381 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7382 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7383 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7384 mp[cp]->product->api_user = product->api_user; 7385 PetscCall(MatProductSetFromOptions(mp[cp])); 7386 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7387 PetscCall(ISGetIndices(glob, &globidx)); 7388 rmapt[cp] = 2; 7389 rmapa[cp] = globidx; 7390 cmapt[cp] = 2; 7391 cmapa[cp] = globidx; 7392 mptmp[cp] = PETSC_FALSE; 7393 cp++; 7394 if (mmdata->P_oth) { 7395 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7396 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7397 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7398 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7399 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7400 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7401 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7402 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7403 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7404 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7405 mp[cp]->product->api_user = product->api_user; 7406 PetscCall(MatProductSetFromOptions(mp[cp])); 7407 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7408 mptmp[cp] = PETSC_TRUE; 7409 cp++; 7410 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7411 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7412 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7413 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7414 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7415 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7416 mp[cp]->product->api_user = product->api_user; 7417 PetscCall(MatProductSetFromOptions(mp[cp])); 7418 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7419 rmapt[cp] = 2; 7420 rmapa[cp] = globidx; 7421 cmapt[cp] = 2; 7422 cmapa[cp] = P_oth_idx; 7423 mptmp[cp] = PETSC_FALSE; 7424 cp++; 7425 } 7426 break; 7427 default: 7428 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7429 } 7430 /* sanity check */ 7431 if (size > 1) 7432 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7433 7434 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7435 for (i = 0; i < cp; i++) { 7436 mmdata->mp[i] = mp[i]; 7437 mmdata->mptmp[i] = mptmp[i]; 7438 } 7439 mmdata->cp = cp; 7440 C->product->data = mmdata; 7441 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7442 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7443 7444 /* memory type */ 7445 mmdata->mtype = PETSC_MEMTYPE_HOST; 7446 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7447 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7448 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7449 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7450 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7451 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7452 7453 /* prepare coo coordinates for values insertion */ 7454 7455 /* count total nonzeros of those intermediate seqaij Mats 7456 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7457 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7458 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7459 */ 7460 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7461 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7462 if (mptmp[cp]) continue; 7463 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7464 const PetscInt *rmap = rmapa[cp]; 7465 const PetscInt mr = mp[cp]->rmap->n; 7466 const PetscInt rs = C->rmap->rstart; 7467 const PetscInt re = C->rmap->rend; 7468 const PetscInt *ii = mm->i; 7469 for (i = 0; i < mr; i++) { 7470 const PetscInt gr = rmap[i]; 7471 const PetscInt nz = ii[i + 1] - ii[i]; 7472 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7473 else ncoo_oown += nz; /* this row is local */ 7474 } 7475 } else ncoo_d += mm->nz; 7476 } 7477 7478 /* 7479 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7480 7481 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7482 7483 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7484 7485 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7486 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7487 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7488 7489 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7490 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7491 */ 7492 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7493 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7494 7495 /* gather (i,j) of nonzeros inserted by remote procs */ 7496 if (hasoffproc) { 7497 PetscSF msf; 7498 PetscInt ncoo2, *coo_i2, *coo_j2; 7499 7500 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7501 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7502 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7503 7504 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7505 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7506 PetscInt *idxoff = mmdata->off[cp]; 7507 PetscInt *idxown = mmdata->own[cp]; 7508 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7509 const PetscInt *rmap = rmapa[cp]; 7510 const PetscInt *cmap = cmapa[cp]; 7511 const PetscInt *ii = mm->i; 7512 PetscInt *coi = coo_i + ncoo_o; 7513 PetscInt *coj = coo_j + ncoo_o; 7514 const PetscInt mr = mp[cp]->rmap->n; 7515 const PetscInt rs = C->rmap->rstart; 7516 const PetscInt re = C->rmap->rend; 7517 const PetscInt cs = C->cmap->rstart; 7518 for (i = 0; i < mr; i++) { 7519 const PetscInt *jj = mm->j + ii[i]; 7520 const PetscInt gr = rmap[i]; 7521 const PetscInt nz = ii[i + 1] - ii[i]; 7522 if (gr < rs || gr >= re) { /* this is an offproc row */ 7523 for (j = ii[i]; j < ii[i + 1]; j++) { 7524 *coi++ = gr; 7525 *idxoff++ = j; 7526 } 7527 if (!cmapt[cp]) { /* already global */ 7528 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7529 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7530 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7531 } else { /* offdiag */ 7532 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7533 } 7534 ncoo_o += nz; 7535 } else { /* this is a local row */ 7536 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7537 } 7538 } 7539 } 7540 mmdata->off[cp + 1] = idxoff; 7541 mmdata->own[cp + 1] = idxown; 7542 } 7543 7544 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7545 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7546 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7547 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7548 ncoo = ncoo_d + ncoo_oown + ncoo2; 7549 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7550 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7551 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7552 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7553 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7554 PetscCall(PetscFree2(coo_i, coo_j)); 7555 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7556 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7557 coo_i = coo_i2; 7558 coo_j = coo_j2; 7559 } else { /* no offproc values insertion */ 7560 ncoo = ncoo_d; 7561 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7562 7563 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7564 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7565 PetscCall(PetscSFSetUp(mmdata->sf)); 7566 } 7567 mmdata->hasoffproc = hasoffproc; 7568 7569 /* gather (i,j) of nonzeros inserted locally */ 7570 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7571 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7572 PetscInt *coi = coo_i + ncoo_d; 7573 PetscInt *coj = coo_j + ncoo_d; 7574 const PetscInt *jj = mm->j; 7575 const PetscInt *ii = mm->i; 7576 const PetscInt *cmap = cmapa[cp]; 7577 const PetscInt *rmap = rmapa[cp]; 7578 const PetscInt mr = mp[cp]->rmap->n; 7579 const PetscInt rs = C->rmap->rstart; 7580 const PetscInt re = C->rmap->rend; 7581 const PetscInt cs = C->cmap->rstart; 7582 7583 if (mptmp[cp]) continue; 7584 if (rmapt[cp] == 1) { /* consecutive rows */ 7585 /* fill coo_i */ 7586 for (i = 0; i < mr; i++) { 7587 const PetscInt gr = i + rs; 7588 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7589 } 7590 /* fill coo_j */ 7591 if (!cmapt[cp]) { /* type-0, already global */ 7592 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7593 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7594 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7595 } else { /* type-2, local to global for sparse columns */ 7596 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7597 } 7598 ncoo_d += mm->nz; 7599 } else if (rmapt[cp] == 2) { /* sparse rows */ 7600 for (i = 0; i < mr; i++) { 7601 const PetscInt *jj = mm->j + ii[i]; 7602 const PetscInt gr = rmap[i]; 7603 const PetscInt nz = ii[i + 1] - ii[i]; 7604 if (gr >= rs && gr < re) { /* local rows */ 7605 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7606 if (!cmapt[cp]) { /* type-0, already global */ 7607 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7608 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7609 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7610 } else { /* type-2, local to global for sparse columns */ 7611 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7612 } 7613 ncoo_d += nz; 7614 } 7615 } 7616 } 7617 } 7618 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7619 PetscCall(ISDestroy(&glob)); 7620 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7621 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7622 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7623 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7624 7625 /* preallocate with COO data */ 7626 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7627 PetscCall(PetscFree2(coo_i, coo_j)); 7628 PetscFunctionReturn(PETSC_SUCCESS); 7629 } 7630 7631 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7632 { 7633 Mat_Product *product = mat->product; 7634 #if defined(PETSC_HAVE_DEVICE) 7635 PetscBool match = PETSC_FALSE; 7636 PetscBool usecpu = PETSC_FALSE; 7637 #else 7638 PetscBool match = PETSC_TRUE; 7639 #endif 7640 7641 PetscFunctionBegin; 7642 MatCheckProduct(mat, 1); 7643 #if defined(PETSC_HAVE_DEVICE) 7644 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7645 if (match) { /* we can always fallback to the CPU if requested */ 7646 switch (product->type) { 7647 case MATPRODUCT_AB: 7648 if (product->api_user) { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7650 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } else { 7653 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7654 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7655 PetscOptionsEnd(); 7656 } 7657 break; 7658 case MATPRODUCT_AtB: 7659 if (product->api_user) { 7660 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7661 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7662 PetscOptionsEnd(); 7663 } else { 7664 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7665 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7666 PetscOptionsEnd(); 7667 } 7668 break; 7669 case MATPRODUCT_PtAP: 7670 if (product->api_user) { 7671 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7672 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7673 PetscOptionsEnd(); 7674 } else { 7675 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7676 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7677 PetscOptionsEnd(); 7678 } 7679 break; 7680 default: 7681 break; 7682 } 7683 match = (PetscBool)!usecpu; 7684 } 7685 #endif 7686 if (match) { 7687 switch (product->type) { 7688 case MATPRODUCT_AB: 7689 case MATPRODUCT_AtB: 7690 case MATPRODUCT_PtAP: 7691 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7692 break; 7693 default: 7694 break; 7695 } 7696 } 7697 /* fallback to MPIAIJ ops */ 7698 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7699 PetscFunctionReturn(PETSC_SUCCESS); 7700 } 7701 7702 /* 7703 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7704 7705 n - the number of block indices in cc[] 7706 cc - the block indices (must be large enough to contain the indices) 7707 */ 7708 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7709 { 7710 PetscInt cnt = -1, nidx, j; 7711 const PetscInt *idx; 7712 7713 PetscFunctionBegin; 7714 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7715 if (nidx) { 7716 cnt = 0; 7717 cc[cnt] = idx[0] / bs; 7718 for (j = 1; j < nidx; j++) { 7719 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7720 } 7721 } 7722 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7723 *n = cnt + 1; 7724 PetscFunctionReturn(PETSC_SUCCESS); 7725 } 7726 7727 /* 7728 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7729 7730 ncollapsed - the number of block indices 7731 collapsed - the block indices (must be large enough to contain the indices) 7732 */ 7733 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7734 { 7735 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7736 7737 PetscFunctionBegin; 7738 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7739 for (i = start + 1; i < start + bs; i++) { 7740 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7741 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7742 cprevtmp = cprev; 7743 cprev = merged; 7744 merged = cprevtmp; 7745 } 7746 *ncollapsed = nprev; 7747 if (collapsed) *collapsed = cprev; 7748 PetscFunctionReturn(PETSC_SUCCESS); 7749 } 7750 7751 /* 7752 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7753 7754 Input Parameter: 7755 . Amat - matrix 7756 - symmetrize - make the result symmetric 7757 + scale - scale with diagonal 7758 7759 Output Parameter: 7760 . a_Gmat - output scalar graph >= 0 7761 7762 */ 7763 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7764 { 7765 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7766 MPI_Comm comm; 7767 Mat Gmat; 7768 PetscBool ismpiaij, isseqaij; 7769 Mat a, b, c; 7770 MatType jtype; 7771 7772 PetscFunctionBegin; 7773 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7774 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7775 PetscCall(MatGetSize(Amat, &MM, &NN)); 7776 PetscCall(MatGetBlockSize(Amat, &bs)); 7777 nloc = (Iend - Istart) / bs; 7778 7779 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7780 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7781 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7782 7783 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7784 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7785 implementation */ 7786 if (bs > 1) { 7787 PetscCall(MatGetType(Amat, &jtype)); 7788 PetscCall(MatCreate(comm, &Gmat)); 7789 PetscCall(MatSetType(Gmat, jtype)); 7790 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7791 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7792 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7793 PetscInt *d_nnz, *o_nnz; 7794 MatScalar *aa, val, *AA; 7795 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7796 if (isseqaij) { 7797 a = Amat; 7798 b = NULL; 7799 } else { 7800 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7801 a = d->A; 7802 b = d->B; 7803 } 7804 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7805 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7806 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7807 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7808 const PetscInt *cols1, *cols2; 7809 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7810 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7811 nnz[brow / bs] = nc2 / bs; 7812 if (nc2 % bs) ok = 0; 7813 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7814 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7815 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7816 if (nc1 != nc2) ok = 0; 7817 else { 7818 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7819 if (cols1[jj] != cols2[jj]) ok = 0; 7820 if (cols1[jj] % bs != jj % bs) ok = 0; 7821 } 7822 } 7823 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7824 } 7825 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7826 if (!ok) { 7827 PetscCall(PetscFree2(d_nnz, o_nnz)); 7828 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7829 goto old_bs; 7830 } 7831 } 7832 } 7833 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7834 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7835 PetscCall(PetscFree2(d_nnz, o_nnz)); 7836 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7837 // diag 7838 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7839 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7840 ai = aseq->i; 7841 n = ai[brow + 1] - ai[brow]; 7842 aj = aseq->j + ai[brow]; 7843 for (int k = 0; k < n; k += bs) { // block columns 7844 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7845 val = 0; 7846 if (index_size == 0) { 7847 for (int ii = 0; ii < bs; ii++) { // rows in block 7848 aa = aseq->a + ai[brow + ii] + k; 7849 for (int jj = 0; jj < bs; jj++) { // columns in block 7850 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7851 } 7852 } 7853 } else { // use (index,index) value if provided 7854 for (int iii = 0; iii < index_size; iii++) { // rows in block 7855 int ii = index[iii]; 7856 aa = aseq->a + ai[brow + ii] + k; 7857 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7858 int jj = index[jjj]; 7859 val += PetscAbs(PetscRealPart(aa[jj])); 7860 } 7861 } 7862 } 7863 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7864 AA[k / bs] = val; 7865 } 7866 grow = Istart / bs + brow / bs; 7867 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7868 } 7869 // off-diag 7870 if (ismpiaij) { 7871 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7872 const PetscScalar *vals; 7873 const PetscInt *cols, *garray = aij->garray; 7874 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7875 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7876 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7877 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7878 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7879 AA[k / bs] = 0; 7880 AJ[cidx] = garray[cols[k]] / bs; 7881 } 7882 nc = ncols / bs; 7883 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7884 if (index_size == 0) { 7885 for (int ii = 0; ii < bs; ii++) { // rows in block 7886 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7887 for (int k = 0; k < ncols; k += bs) { 7888 for (int jj = 0; jj < bs; jj++) { // cols in block 7889 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7890 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7891 } 7892 } 7893 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7894 } 7895 } else { // use (index,index) value if provided 7896 for (int iii = 0; iii < index_size; iii++) { // rows in block 7897 int ii = index[iii]; 7898 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7899 for (int k = 0; k < ncols; k += bs) { 7900 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7901 int jj = index[jjj]; 7902 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7903 } 7904 } 7905 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7906 } 7907 } 7908 grow = Istart / bs + brow / bs; 7909 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7910 } 7911 } 7912 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7913 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7914 PetscCall(PetscFree2(AA, AJ)); 7915 } else { 7916 const PetscScalar *vals; 7917 const PetscInt *idx; 7918 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7919 old_bs: 7920 /* 7921 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7922 */ 7923 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7924 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7925 if (isseqaij) { 7926 PetscInt max_d_nnz; 7927 /* 7928 Determine exact preallocation count for (sequential) scalar matrix 7929 */ 7930 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7931 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7932 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7933 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7934 PetscCall(PetscFree3(w0, w1, w2)); 7935 } else if (ismpiaij) { 7936 Mat Daij, Oaij; 7937 const PetscInt *garray; 7938 PetscInt max_d_nnz; 7939 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7940 /* 7941 Determine exact preallocation count for diagonal block portion of scalar matrix 7942 */ 7943 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7944 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7945 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7946 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7947 PetscCall(PetscFree3(w0, w1, w2)); 7948 /* 7949 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7950 */ 7951 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7952 o_nnz[jj] = 0; 7953 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7954 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7955 o_nnz[jj] += ncols; 7956 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7957 } 7958 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7959 } 7960 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7961 /* get scalar copy (norms) of matrix */ 7962 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7963 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7964 PetscCall(PetscFree2(d_nnz, o_nnz)); 7965 for (Ii = Istart; Ii < Iend; Ii++) { 7966 PetscInt dest_row = Ii / bs; 7967 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7968 for (jj = 0; jj < ncols; jj++) { 7969 PetscInt dest_col = idx[jj] / bs; 7970 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7971 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7972 } 7973 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7974 } 7975 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7976 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7977 } 7978 } else { 7979 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7980 else { 7981 Gmat = Amat; 7982 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7983 } 7984 if (isseqaij) { 7985 a = Gmat; 7986 b = NULL; 7987 } else { 7988 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7989 a = d->A; 7990 b = d->B; 7991 } 7992 if (filter >= 0 || scale) { 7993 /* take absolute value of each entry */ 7994 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7995 MatInfo info; 7996 PetscScalar *avals; 7997 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7998 PetscCall(MatSeqAIJGetArray(c, &avals)); 7999 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8000 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8001 } 8002 } 8003 } 8004 if (symmetrize) { 8005 PetscBool isset, issym; 8006 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8007 if (!isset || !issym) { 8008 Mat matTrans; 8009 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8010 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8011 PetscCall(MatDestroy(&matTrans)); 8012 } 8013 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8014 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8015 if (scale) { 8016 /* scale c for all diagonal values = 1 or -1 */ 8017 Vec diag; 8018 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8019 PetscCall(MatGetDiagonal(Gmat, diag)); 8020 PetscCall(VecReciprocal(diag)); 8021 PetscCall(VecSqrtAbs(diag)); 8022 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8023 PetscCall(VecDestroy(&diag)); 8024 } 8025 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8026 8027 if (filter >= 0) { 8028 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8029 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8030 } 8031 *a_Gmat = Gmat; 8032 PetscFunctionReturn(PETSC_SUCCESS); 8033 } 8034 8035 /* 8036 Special version for direct calls from Fortran 8037 */ 8038 #include <petsc/private/fortranimpl.h> 8039 8040 /* Change these macros so can be used in void function */ 8041 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8042 #undef PetscCall 8043 #define PetscCall(...) \ 8044 do { \ 8045 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8046 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8047 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8048 return; \ 8049 } \ 8050 } while (0) 8051 8052 #undef SETERRQ 8053 #define SETERRQ(comm, ierr, ...) \ 8054 do { \ 8055 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8056 return; \ 8057 } while (0) 8058 8059 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8060 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8061 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8062 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8063 #else 8064 #endif 8065 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8066 { 8067 Mat mat = *mmat; 8068 PetscInt m = *mm, n = *mn; 8069 InsertMode addv = *maddv; 8070 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8071 PetscScalar value; 8072 8073 MatCheckPreallocated(mat, 1); 8074 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8075 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8076 { 8077 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8078 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8079 PetscBool roworiented = aij->roworiented; 8080 8081 /* Some Variables required in the macro */ 8082 Mat A = aij->A; 8083 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8084 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8085 MatScalar *aa; 8086 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8087 Mat B = aij->B; 8088 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8089 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8090 MatScalar *ba; 8091 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8092 * cannot use "#if defined" inside a macro. */ 8093 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8094 8095 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8096 PetscInt nonew = a->nonew; 8097 MatScalar *ap1, *ap2; 8098 8099 PetscFunctionBegin; 8100 PetscCall(MatSeqAIJGetArray(A, &aa)); 8101 PetscCall(MatSeqAIJGetArray(B, &ba)); 8102 for (i = 0; i < m; i++) { 8103 if (im[i] < 0) continue; 8104 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8105 if (im[i] >= rstart && im[i] < rend) { 8106 row = im[i] - rstart; 8107 lastcol1 = -1; 8108 rp1 = aj + ai[row]; 8109 ap1 = aa + ai[row]; 8110 rmax1 = aimax[row]; 8111 nrow1 = ailen[row]; 8112 low1 = 0; 8113 high1 = nrow1; 8114 lastcol2 = -1; 8115 rp2 = bj + bi[row]; 8116 ap2 = ba + bi[row]; 8117 rmax2 = bimax[row]; 8118 nrow2 = bilen[row]; 8119 low2 = 0; 8120 high2 = nrow2; 8121 8122 for (j = 0; j < n; j++) { 8123 if (roworiented) value = v[i * n + j]; 8124 else value = v[i + j * m]; 8125 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8126 if (in[j] >= cstart && in[j] < cend) { 8127 col = in[j] - cstart; 8128 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8129 } else if (in[j] < 0) continue; 8130 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8131 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8132 } else { 8133 if (mat->was_assembled) { 8134 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8135 #if defined(PETSC_USE_CTABLE) 8136 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8137 col--; 8138 #else 8139 col = aij->colmap[in[j]] - 1; 8140 #endif 8141 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8142 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8143 col = in[j]; 8144 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8145 B = aij->B; 8146 b = (Mat_SeqAIJ *)B->data; 8147 bimax = b->imax; 8148 bi = b->i; 8149 bilen = b->ilen; 8150 bj = b->j; 8151 rp2 = bj + bi[row]; 8152 ap2 = ba + bi[row]; 8153 rmax2 = bimax[row]; 8154 nrow2 = bilen[row]; 8155 low2 = 0; 8156 high2 = nrow2; 8157 bm = aij->B->rmap->n; 8158 ba = b->a; 8159 inserted = PETSC_FALSE; 8160 } 8161 } else col = in[j]; 8162 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8163 } 8164 } 8165 } else if (!aij->donotstash) { 8166 if (roworiented) { 8167 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8168 } else { 8169 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8170 } 8171 } 8172 } 8173 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8174 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8175 } 8176 PetscFunctionReturnVoid(); 8177 } 8178 8179 /* Undefining these here since they were redefined from their original definition above! No 8180 * other PETSc functions should be defined past this point, as it is impossible to recover the 8181 * original definitions */ 8182 #undef PetscCall 8183 #undef SETERRQ 8184