1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(VecDestroy(&aij->diag)); 17 PetscCall(MatDestroy(&aij->A)); 18 PetscCall(MatDestroy(&aij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&aij->colmap)); 21 #else 22 PetscCall(PetscFree(aij->colmap)); 23 #endif 24 PetscCall(PetscFree(aij->garray)); 25 PetscCall(VecDestroy(&aij->lvec)); 26 PetscCall(VecScatterDestroy(&aij->Mvctx)); 27 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 28 PetscCall(PetscFree(aij->ld)); 29 30 PetscCall(PetscFree(mat->data)); 31 32 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 33 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 34 35 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 45 #if defined(PETSC_HAVE_CUDA) 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 47 #endif 48 #if defined(PETSC_HAVE_HIP) 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 50 #endif 51 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 52 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 53 #endif 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 55 #if defined(PETSC_HAVE_ELEMENTAL) 56 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 57 #endif 58 #if defined(PETSC_HAVE_SCALAPACK) 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 60 #endif 61 #if defined(PETSC_HAVE_HYPRE) 62 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 63 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 64 #endif 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 71 #if defined(PETSC_HAVE_MKL_SPARSE) 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 73 #endif 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 76 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 79 PetscFunctionReturn(PETSC_SUCCESS); 80 } 81 82 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 83 #define TYPE AIJ 84 #define TYPE_AIJ 85 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 86 #undef TYPE 87 #undef TYPE_AIJ 88 89 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 90 { 91 Mat B; 92 93 PetscFunctionBegin; 94 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 95 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 96 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 97 PetscCall(MatDestroy(&B)); 98 PetscFunctionReturn(PETSC_SUCCESS); 99 } 100 101 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 102 { 103 Mat B; 104 105 PetscFunctionBegin; 106 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 107 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 108 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 /*MC 113 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 114 115 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 116 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 117 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 118 for communicators controlling multiple processes. It is recommended that you call both of 119 the above preallocation routines for simplicity. 120 121 Options Database Key: 122 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 123 124 Developer Note: 125 Level: beginner 126 127 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 128 enough exist. 129 130 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 131 M*/ 132 133 /*MC 134 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 135 136 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 137 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 138 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 139 for communicators controlling multiple processes. It is recommended that you call both of 140 the above preallocation routines for simplicity. 141 142 Options Database Key: 143 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 144 145 Level: beginner 146 147 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 148 M*/ 149 150 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 151 { 152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 153 154 PetscFunctionBegin; 155 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 156 A->boundtocpu = flg; 157 #endif 158 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 159 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 160 161 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 162 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 163 * to differ from the parent matrix. */ 164 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 165 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 166 PetscFunctionReturn(PETSC_SUCCESS); 167 } 168 169 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 170 { 171 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 172 173 PetscFunctionBegin; 174 if (mat->A) { 175 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 176 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 177 } 178 PetscFunctionReturn(PETSC_SUCCESS); 179 } 180 181 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 182 { 183 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 184 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 185 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 186 const PetscInt *ia, *ib; 187 const MatScalar *aa, *bb, *aav, *bav; 188 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 189 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 190 191 PetscFunctionBegin; 192 *keptrows = NULL; 193 194 ia = a->i; 195 ib = b->i; 196 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 197 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 198 for (i = 0; i < m; i++) { 199 na = ia[i + 1] - ia[i]; 200 nb = ib[i + 1] - ib[i]; 201 if (!na && !nb) { 202 cnt++; 203 goto ok1; 204 } 205 aa = aav + ia[i]; 206 for (j = 0; j < na; j++) { 207 if (aa[j] != 0.0) goto ok1; 208 } 209 bb = PetscSafePointerPlusOffset(bav, ib[i]); 210 for (j = 0; j < nb; j++) { 211 if (bb[j] != 0.0) goto ok1; 212 } 213 cnt++; 214 ok1:; 215 } 216 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 217 if (!n0rows) { 218 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 220 PetscFunctionReturn(PETSC_SUCCESS); 221 } 222 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 223 cnt = 0; 224 for (i = 0; i < m; i++) { 225 na = ia[i + 1] - ia[i]; 226 nb = ib[i + 1] - ib[i]; 227 if (!na && !nb) continue; 228 aa = aav + ia[i]; 229 for (j = 0; j < na; j++) { 230 if (aa[j] != 0.0) { 231 rows[cnt++] = rstart + i; 232 goto ok2; 233 } 234 } 235 bb = PetscSafePointerPlusOffset(bav, ib[i]); 236 for (j = 0; j < nb; j++) { 237 if (bb[j] != 0.0) { 238 rows[cnt++] = rstart + i; 239 goto ok2; 240 } 241 } 242 ok2:; 243 } 244 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 245 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 246 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 247 PetscFunctionReturn(PETSC_SUCCESS); 248 } 249 250 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 251 { 252 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 253 PetscBool cong; 254 255 PetscFunctionBegin; 256 PetscCall(MatHasCongruentLayouts(Y, &cong)); 257 if (Y->assembled && cong) { 258 PetscCall(MatDiagonalSet(aij->A, D, is)); 259 } else { 260 PetscCall(MatDiagonalSet_Default(Y, D, is)); 261 } 262 PetscFunctionReturn(PETSC_SUCCESS); 263 } 264 265 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 266 { 267 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 268 PetscInt i, rstart, nrows, *rows; 269 270 PetscFunctionBegin; 271 *zrows = NULL; 272 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 273 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 274 for (i = 0; i < nrows; i++) rows[i] += rstart; 275 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 276 PetscFunctionReturn(PETSC_SUCCESS); 277 } 278 279 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 280 { 281 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 282 PetscInt i, m, n, *garray = aij->garray; 283 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 284 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 285 PetscReal *work; 286 const PetscScalar *dummy; 287 288 PetscFunctionBegin; 289 PetscCall(MatGetSize(A, &m, &n)); 290 PetscCall(PetscCalloc1(n, &work)); 291 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 292 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 293 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 294 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 295 if (type == NORM_2) { 296 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 297 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 298 } else if (type == NORM_1) { 299 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 300 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 301 } else if (type == NORM_INFINITY) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 304 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 307 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 310 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 311 if (type == NORM_INFINITY) { 312 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 313 } else { 314 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 315 } 316 PetscCall(PetscFree(work)); 317 if (type == NORM_2) { 318 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 319 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 320 for (i = 0; i < n; i++) reductions[i] /= m; 321 } 322 PetscFunctionReturn(PETSC_SUCCESS); 323 } 324 325 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 326 { 327 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 328 IS sis, gis; 329 const PetscInt *isis, *igis; 330 PetscInt n, *iis, nsis, ngis, rstart, i; 331 332 PetscFunctionBegin; 333 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 334 PetscCall(MatFindNonzeroRows(a->B, &gis)); 335 PetscCall(ISGetSize(gis, &ngis)); 336 PetscCall(ISGetSize(sis, &nsis)); 337 PetscCall(ISGetIndices(sis, &isis)); 338 PetscCall(ISGetIndices(gis, &igis)); 339 340 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 341 PetscCall(PetscArraycpy(iis, igis, ngis)); 342 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 343 n = ngis + nsis; 344 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 345 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 346 for (i = 0; i < n; i++) iis[i] += rstart; 347 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 348 349 PetscCall(ISRestoreIndices(sis, &isis)); 350 PetscCall(ISRestoreIndices(gis, &igis)); 351 PetscCall(ISDestroy(&sis)); 352 PetscCall(ISDestroy(&gis)); 353 PetscFunctionReturn(PETSC_SUCCESS); 354 } 355 356 /* 357 Local utility routine that creates a mapping from the global column 358 number to the local number in the off-diagonal part of the local 359 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 360 a slightly higher hash table cost; without it it is not scalable (each processor 361 has an order N integer array but is fast to access. 362 */ 363 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 364 { 365 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 366 PetscInt n = aij->B->cmap->n, i; 367 368 PetscFunctionBegin; 369 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 370 #if defined(PETSC_USE_CTABLE) 371 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 372 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 373 #else 374 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 375 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 376 #endif 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 381 do { \ 382 if (col <= lastcol1) low1 = 0; \ 383 else high1 = nrow1; \ 384 lastcol1 = col; \ 385 while (high1 - low1 > 5) { \ 386 t = (low1 + high1) / 2; \ 387 if (rp1[t] > col) high1 = t; \ 388 else low1 = t; \ 389 } \ 390 for (_i = low1; _i < high1; _i++) { \ 391 if (rp1[_i] > col) break; \ 392 if (rp1[_i] == col) { \ 393 if (addv == ADD_VALUES) { \ 394 ap1[_i] += value; \ 395 /* Not sure LogFlops will slow dow the code or not */ \ 396 (void)PetscLogFlops(1.0); \ 397 } else ap1[_i] = value; \ 398 goto a_noinsert; \ 399 } \ 400 } \ 401 if (value == 0.0 && ignorezeroentries && row != col) { \ 402 low1 = 0; \ 403 high1 = nrow1; \ 404 goto a_noinsert; \ 405 } \ 406 if (nonew == 1) { \ 407 low1 = 0; \ 408 high1 = nrow1; \ 409 goto a_noinsert; \ 410 } \ 411 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 412 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 413 N = nrow1++ - 1; \ 414 a->nz++; \ 415 high1++; \ 416 /* shift up all the later entries in this row */ \ 417 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 418 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 419 rp1[_i] = col; \ 420 ap1[_i] = value; \ 421 a_noinsert:; \ 422 ailen[row] = nrow1; \ 423 } while (0) 424 425 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 426 do { \ 427 if (col <= lastcol2) low2 = 0; \ 428 else high2 = nrow2; \ 429 lastcol2 = col; \ 430 while (high2 - low2 > 5) { \ 431 t = (low2 + high2) / 2; \ 432 if (rp2[t] > col) high2 = t; \ 433 else low2 = t; \ 434 } \ 435 for (_i = low2; _i < high2; _i++) { \ 436 if (rp2[_i] > col) break; \ 437 if (rp2[_i] == col) { \ 438 if (addv == ADD_VALUES) { \ 439 ap2[_i] += value; \ 440 (void)PetscLogFlops(1.0); \ 441 } else ap2[_i] = value; \ 442 goto b_noinsert; \ 443 } \ 444 } \ 445 if (value == 0.0 && ignorezeroentries) { \ 446 low2 = 0; \ 447 high2 = nrow2; \ 448 goto b_noinsert; \ 449 } \ 450 if (nonew == 1) { \ 451 low2 = 0; \ 452 high2 = nrow2; \ 453 goto b_noinsert; \ 454 } \ 455 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 456 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 457 N = nrow2++ - 1; \ 458 b->nz++; \ 459 high2++; \ 460 /* shift up all the later entries in this row */ \ 461 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 462 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 463 rp2[_i] = col; \ 464 ap2[_i] = value; \ 465 b_noinsert:; \ 466 bilen[row] = nrow2; \ 467 } while (0) 468 469 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 470 { 471 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 472 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 473 PetscInt l, *garray = mat->garray, diag; 474 PetscScalar *aa, *ba; 475 476 PetscFunctionBegin; 477 /* code only works for square matrices A */ 478 479 /* find size of row to the left of the diagonal part */ 480 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 481 row = row - diag; 482 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 483 if (garray[b->j[b->i[row] + l]] > diag) break; 484 } 485 if (l) { 486 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 487 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 488 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 489 } 490 491 /* diagonal part */ 492 if (a->i[row + 1] - a->i[row]) { 493 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 494 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 495 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 496 } 497 498 /* right of diagonal part */ 499 if (b->i[row + 1] - b->i[row] - l) { 500 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 501 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 502 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 503 } 504 PetscFunctionReturn(PETSC_SUCCESS); 505 } 506 507 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 508 { 509 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 510 PetscScalar value = 0.0; 511 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 512 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 513 PetscBool roworiented = aij->roworiented; 514 515 /* Some Variables required in the macro */ 516 Mat A = aij->A; 517 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 518 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 519 PetscBool ignorezeroentries = a->ignorezeroentries; 520 Mat B = aij->B; 521 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 522 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 523 MatScalar *aa, *ba; 524 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 525 PetscInt nonew; 526 MatScalar *ap1, *ap2; 527 528 PetscFunctionBegin; 529 PetscCall(MatSeqAIJGetArray(A, &aa)); 530 PetscCall(MatSeqAIJGetArray(B, &ba)); 531 for (i = 0; i < m; i++) { 532 if (im[i] < 0) continue; 533 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 534 if (im[i] >= rstart && im[i] < rend) { 535 row = im[i] - rstart; 536 lastcol1 = -1; 537 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 538 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 539 rmax1 = aimax[row]; 540 nrow1 = ailen[row]; 541 low1 = 0; 542 high1 = nrow1; 543 lastcol2 = -1; 544 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 545 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 546 rmax2 = bimax[row]; 547 nrow2 = bilen[row]; 548 low2 = 0; 549 high2 = nrow2; 550 551 for (j = 0; j < n; j++) { 552 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 553 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 554 if (in[j] >= cstart && in[j] < cend) { 555 col = in[j] - cstart; 556 nonew = a->nonew; 557 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 558 } else if (in[j] < 0) { 559 continue; 560 } else { 561 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 562 if (mat->was_assembled) { 563 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 564 #if defined(PETSC_USE_CTABLE) 565 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 566 col--; 567 #else 568 col = aij->colmap[in[j]] - 1; 569 #endif 570 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 571 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 572 col = in[j]; 573 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 574 B = aij->B; 575 b = (Mat_SeqAIJ *)B->data; 576 bimax = b->imax; 577 bi = b->i; 578 bilen = b->ilen; 579 bj = b->j; 580 ba = b->a; 581 rp2 = bj + bi[row]; 582 ap2 = ba + bi[row]; 583 rmax2 = bimax[row]; 584 nrow2 = bilen[row]; 585 low2 = 0; 586 high2 = nrow2; 587 bm = aij->B->rmap->n; 588 ba = b->a; 589 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 590 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 591 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 592 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 593 } 594 } else col = in[j]; 595 nonew = b->nonew; 596 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 597 } 598 } 599 } else { 600 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 601 if (!aij->donotstash) { 602 mat->assembled = PETSC_FALSE; 603 if (roworiented) { 604 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 605 } else { 606 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 607 } 608 } 609 } 610 } 611 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 612 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 613 PetscFunctionReturn(PETSC_SUCCESS); 614 } 615 616 /* 617 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 618 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 619 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 620 */ 621 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 622 { 623 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 624 Mat A = aij->A; /* diagonal part of the matrix */ 625 Mat B = aij->B; /* off-diagonal part of the matrix */ 626 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 627 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 628 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 629 PetscInt *ailen = a->ilen, *aj = a->j; 630 PetscInt *bilen = b->ilen, *bj = b->j; 631 PetscInt am = aij->A->rmap->n, j; 632 PetscInt diag_so_far = 0, dnz; 633 PetscInt offd_so_far = 0, onz; 634 635 PetscFunctionBegin; 636 /* Iterate over all rows of the matrix */ 637 for (j = 0; j < am; j++) { 638 dnz = onz = 0; 639 /* Iterate over all non-zero columns of the current row */ 640 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 641 /* If column is in the diagonal */ 642 if (mat_j[col] >= cstart && mat_j[col] < cend) { 643 aj[diag_so_far++] = mat_j[col] - cstart; 644 dnz++; 645 } else { /* off-diagonal entries */ 646 bj[offd_so_far++] = mat_j[col]; 647 onz++; 648 } 649 } 650 ailen[j] = dnz; 651 bilen[j] = onz; 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 /* 657 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 658 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 659 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 660 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 661 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 662 */ 663 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 664 { 665 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 666 Mat A = aij->A; /* diagonal part of the matrix */ 667 Mat B = aij->B; /* off-diagonal part of the matrix */ 668 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 669 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 670 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 671 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 672 PetscInt *ailen = a->ilen, *aj = a->j; 673 PetscInt *bilen = b->ilen, *bj = b->j; 674 PetscInt am = aij->A->rmap->n, j; 675 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 676 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 677 PetscScalar *aa = a->a, *ba = b->a; 678 679 PetscFunctionBegin; 680 /* Iterate over all rows of the matrix */ 681 for (j = 0; j < am; j++) { 682 dnz_row = onz_row = 0; 683 rowstart_offd = full_offd_i[j]; 684 rowstart_diag = full_diag_i[j]; 685 /* Iterate over all non-zero columns of the current row */ 686 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 687 /* If column is in the diagonal */ 688 if (mat_j[col] >= cstart && mat_j[col] < cend) { 689 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 690 aa[rowstart_diag + dnz_row] = mat_a[col]; 691 dnz_row++; 692 } else { /* off-diagonal entries */ 693 bj[rowstart_offd + onz_row] = mat_j[col]; 694 ba[rowstart_offd + onz_row] = mat_a[col]; 695 onz_row++; 696 } 697 } 698 ailen[j] = dnz_row; 699 bilen[j] = onz_row; 700 } 701 PetscFunctionReturn(PETSC_SUCCESS); 702 } 703 704 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 705 { 706 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 707 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 708 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 709 710 PetscFunctionBegin; 711 for (i = 0; i < m; i++) { 712 if (idxm[i] < 0) continue; /* negative row */ 713 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 714 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 715 row = idxm[i] - rstart; 716 for (j = 0; j < n; j++) { 717 if (idxn[j] < 0) continue; /* negative column */ 718 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 719 if (idxn[j] >= cstart && idxn[j] < cend) { 720 col = idxn[j] - cstart; 721 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 722 } else { 723 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 724 #if defined(PETSC_USE_CTABLE) 725 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 726 col--; 727 #else 728 col = aij->colmap[idxn[j]] - 1; 729 #endif 730 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 731 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 732 } 733 } 734 } 735 PetscFunctionReturn(PETSC_SUCCESS); 736 } 737 738 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 739 { 740 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 741 PetscInt nstash, reallocs; 742 743 PetscFunctionBegin; 744 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 745 746 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 747 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 748 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 749 PetscFunctionReturn(PETSC_SUCCESS); 750 } 751 752 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 753 { 754 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 755 PetscMPIInt n; 756 PetscInt i, j, rstart, ncols, flg; 757 PetscInt *row, *col; 758 PetscBool other_disassembled; 759 PetscScalar *val; 760 761 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 762 763 PetscFunctionBegin; 764 if (!aij->donotstash && !mat->nooffprocentries) { 765 while (1) { 766 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 767 if (!flg) break; 768 769 for (i = 0; i < n;) { 770 /* Now identify the consecutive vals belonging to the same row */ 771 for (j = i, rstart = row[j]; j < n; j++) { 772 if (row[j] != rstart) break; 773 } 774 if (j < n) ncols = j - i; 775 else ncols = n - i; 776 /* Now assemble all these values with a single function call */ 777 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 778 i = j; 779 } 780 } 781 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 782 } 783 #if defined(PETSC_HAVE_DEVICE) 784 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 785 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 786 if (mat->boundtocpu) { 787 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 788 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 789 } 790 #endif 791 PetscCall(MatAssemblyBegin(aij->A, mode)); 792 PetscCall(MatAssemblyEnd(aij->A, mode)); 793 794 /* determine if any processor has disassembled, if so we must 795 also disassemble ourself, in order that we may reassemble. */ 796 /* 797 if nonzero structure of submatrix B cannot change then we know that 798 no processor disassembled thus we can skip this stuff 799 */ 800 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 801 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 802 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 803 PetscCall(MatDisAssemble_MPIAIJ(mat)); 804 } 805 } 806 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 807 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 808 #if defined(PETSC_HAVE_DEVICE) 809 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 810 #endif 811 PetscCall(MatAssemblyBegin(aij->B, mode)); 812 PetscCall(MatAssemblyEnd(aij->B, mode)); 813 814 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 815 816 aij->rowvalues = NULL; 817 818 PetscCall(VecDestroy(&aij->diag)); 819 820 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 821 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 822 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 823 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 824 } 825 #if defined(PETSC_HAVE_DEVICE) 826 mat->offloadmask = PETSC_OFFLOAD_BOTH; 827 #endif 828 PetscFunctionReturn(PETSC_SUCCESS); 829 } 830 831 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 832 { 833 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 834 835 PetscFunctionBegin; 836 PetscCall(MatZeroEntries(l->A)); 837 PetscCall(MatZeroEntries(l->B)); 838 PetscFunctionReturn(PETSC_SUCCESS); 839 } 840 841 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 842 { 843 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 844 PetscInt *lrows; 845 PetscInt r, len; 846 PetscBool cong; 847 848 PetscFunctionBegin; 849 /* get locally owned rows */ 850 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 851 PetscCall(MatHasCongruentLayouts(A, &cong)); 852 /* fix right-hand side if needed */ 853 if (x && b) { 854 const PetscScalar *xx; 855 PetscScalar *bb; 856 857 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 858 PetscCall(VecGetArrayRead(x, &xx)); 859 PetscCall(VecGetArray(b, &bb)); 860 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 861 PetscCall(VecRestoreArrayRead(x, &xx)); 862 PetscCall(VecRestoreArray(b, &bb)); 863 } 864 865 if (diag != 0.0 && cong) { 866 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 867 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 868 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 869 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 870 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 871 PetscInt nnwA, nnwB; 872 PetscBool nnzA, nnzB; 873 874 nnwA = aijA->nonew; 875 nnwB = aijB->nonew; 876 nnzA = aijA->keepnonzeropattern; 877 nnzB = aijB->keepnonzeropattern; 878 if (!nnzA) { 879 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 880 aijA->nonew = 0; 881 } 882 if (!nnzB) { 883 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 884 aijB->nonew = 0; 885 } 886 /* Must zero here before the next loop */ 887 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 for (r = 0; r < len; ++r) { 890 const PetscInt row = lrows[r] + A->rmap->rstart; 891 if (row >= A->cmap->N) continue; 892 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 893 } 894 aijA->nonew = nnwA; 895 aijB->nonew = nnwB; 896 } else { 897 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 898 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 899 } 900 PetscCall(PetscFree(lrows)); 901 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 902 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 903 904 /* only change matrix nonzero state if pattern was allowed to be changed */ 905 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 906 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 907 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 908 } 909 PetscFunctionReturn(PETSC_SUCCESS); 910 } 911 912 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 913 { 914 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 915 PetscMPIInt n = A->rmap->n; 916 PetscInt i, j, r, m, len = 0; 917 PetscInt *lrows, *owners = A->rmap->range; 918 PetscMPIInt p = 0; 919 PetscSFNode *rrows; 920 PetscSF sf; 921 const PetscScalar *xx; 922 PetscScalar *bb, *mask, *aij_a; 923 Vec xmask, lmask; 924 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 925 const PetscInt *aj, *ii, *ridx; 926 PetscScalar *aa; 927 928 PetscFunctionBegin; 929 /* Create SF where leaves are input rows and roots are owned rows */ 930 PetscCall(PetscMalloc1(n, &lrows)); 931 for (r = 0; r < n; ++r) lrows[r] = -1; 932 PetscCall(PetscMalloc1(N, &rrows)); 933 for (r = 0; r < N; ++r) { 934 const PetscInt idx = rows[r]; 935 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 936 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 937 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 938 } 939 rrows[r].rank = p; 940 rrows[r].index = rows[r] - owners[p]; 941 } 942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 943 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 944 /* Collect flags for rows to be zeroed */ 945 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 946 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 947 PetscCall(PetscSFDestroy(&sf)); 948 /* Compress and put in row numbers */ 949 for (r = 0; r < n; ++r) 950 if (lrows[r] >= 0) lrows[len++] = r; 951 /* zero diagonal part of matrix */ 952 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 953 /* handle off-diagonal part of matrix */ 954 PetscCall(MatCreateVecs(A, &xmask, NULL)); 955 PetscCall(VecDuplicate(l->lvec, &lmask)); 956 PetscCall(VecGetArray(xmask, &bb)); 957 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 958 PetscCall(VecRestoreArray(xmask, &bb)); 959 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 960 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 961 PetscCall(VecDestroy(&xmask)); 962 if (x && b) { /* this code is buggy when the row and column layout don't match */ 963 PetscBool cong; 964 965 PetscCall(MatHasCongruentLayouts(A, &cong)); 966 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 967 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 968 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscCall(VecGetArrayRead(l->lvec, &xx)); 970 PetscCall(VecGetArray(b, &bb)); 971 } 972 PetscCall(VecGetArray(lmask, &mask)); 973 /* remove zeroed rows of off-diagonal matrix */ 974 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 975 ii = aij->i; 976 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 977 /* loop over all elements of off process part of matrix zeroing removed columns*/ 978 if (aij->compressedrow.use) { 979 m = aij->compressedrow.nrows; 980 ii = aij->compressedrow.i; 981 ridx = aij->compressedrow.rindex; 982 for (i = 0; i < m; i++) { 983 n = ii[i + 1] - ii[i]; 984 aj = aij->j + ii[i]; 985 aa = aij_a + ii[i]; 986 987 for (j = 0; j < n; j++) { 988 if (PetscAbsScalar(mask[*aj])) { 989 if (b) bb[*ridx] -= *aa * xx[*aj]; 990 *aa = 0.0; 991 } 992 aa++; 993 aj++; 994 } 995 ridx++; 996 } 997 } else { /* do not use compressed row format */ 998 m = l->B->rmap->n; 999 for (i = 0; i < m; i++) { 1000 n = ii[i + 1] - ii[i]; 1001 aj = aij->j + ii[i]; 1002 aa = aij_a + ii[i]; 1003 for (j = 0; j < n; j++) { 1004 if (PetscAbsScalar(mask[*aj])) { 1005 if (b) bb[i] -= *aa * xx[*aj]; 1006 *aa = 0.0; 1007 } 1008 aa++; 1009 aj++; 1010 } 1011 } 1012 } 1013 if (x && b) { 1014 PetscCall(VecRestoreArray(b, &bb)); 1015 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1016 } 1017 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1018 PetscCall(VecRestoreArray(lmask, &mask)); 1019 PetscCall(VecDestroy(&lmask)); 1020 PetscCall(PetscFree(lrows)); 1021 1022 /* only change matrix nonzero state if pattern was allowed to be changed */ 1023 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1024 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1025 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1026 } 1027 PetscFunctionReturn(PETSC_SUCCESS); 1028 } 1029 1030 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1031 { 1032 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1033 PetscInt nt; 1034 VecScatter Mvctx = a->Mvctx; 1035 1036 PetscFunctionBegin; 1037 PetscCall(VecGetLocalSize(xx, &nt)); 1038 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1039 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1040 PetscUseTypeMethod(a->A, mult, xx, yy); 1041 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1042 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 1046 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1047 { 1048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1049 1050 PetscFunctionBegin; 1051 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1056 { 1057 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 /* do nondiagonal part */ 1074 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1075 /* do local part */ 1076 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1077 /* add partial results together */ 1078 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1079 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1080 PetscFunctionReturn(PETSC_SUCCESS); 1081 } 1082 1083 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1084 { 1085 MPI_Comm comm; 1086 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1087 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1088 IS Me, Notme; 1089 PetscInt M, N, first, last, *notme, i; 1090 PetscBool lf; 1091 PetscMPIInt size; 1092 1093 PetscFunctionBegin; 1094 /* Easy test: symmetric diagonal block */ 1095 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1096 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1097 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1098 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1099 PetscCallMPI(MPI_Comm_size(comm, &size)); 1100 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1101 1102 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1103 PetscCall(MatGetSize(Amat, &M, &N)); 1104 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1105 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1106 for (i = 0; i < first; i++) notme[i] = i; 1107 for (i = last; i < M; i++) notme[i - last + first] = i; 1108 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1109 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1110 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1111 Aoff = Aoffs[0]; 1112 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1113 Boff = Boffs[0]; 1114 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1115 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1116 PetscCall(MatDestroyMatrices(1, &Boffs)); 1117 PetscCall(ISDestroy(&Me)); 1118 PetscCall(ISDestroy(&Notme)); 1119 PetscCall(PetscFree(notme)); 1120 PetscFunctionReturn(PETSC_SUCCESS); 1121 } 1122 1123 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1124 { 1125 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1126 1127 PetscFunctionBegin; 1128 /* do nondiagonal part */ 1129 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1130 /* do local part */ 1131 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1132 /* add partial results together */ 1133 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1134 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1135 PetscFunctionReturn(PETSC_SUCCESS); 1136 } 1137 1138 /* 1139 This only works correctly for square matrices where the subblock A->A is the 1140 diagonal block 1141 */ 1142 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1148 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1149 PetscCall(MatGetDiagonal(a->A, v)); 1150 PetscFunctionReturn(PETSC_SUCCESS); 1151 } 1152 1153 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1154 { 1155 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1156 1157 PetscFunctionBegin; 1158 PetscCall(MatScale(a->A, aa)); 1159 PetscCall(MatScale(a->B, aa)); 1160 PetscFunctionReturn(PETSC_SUCCESS); 1161 } 1162 1163 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1164 { 1165 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1166 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1167 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1168 const PetscInt *garray = aij->garray; 1169 const PetscScalar *aa, *ba; 1170 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1171 PetscInt64 nz, hnz; 1172 PetscInt *rowlens; 1173 PetscInt *colidxs; 1174 PetscScalar *matvals; 1175 PetscMPIInt rank; 1176 1177 PetscFunctionBegin; 1178 PetscCall(PetscViewerSetUp(viewer)); 1179 1180 M = mat->rmap->N; 1181 N = mat->cmap->N; 1182 m = mat->rmap->n; 1183 rs = mat->rmap->rstart; 1184 cs = mat->cmap->rstart; 1185 nz = A->nz + B->nz; 1186 1187 /* write matrix header */ 1188 header[0] = MAT_FILE_CLASSID; 1189 header[1] = M; 1190 header[2] = N; 1191 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1192 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1193 if (rank == 0) { 1194 if (hnz > PETSC_MAX_INT) header[3] = PETSC_MAX_INT; 1195 else header[3] = (PetscInt)hnz; 1196 } 1197 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1198 1199 /* fill in and store row lengths */ 1200 PetscCall(PetscMalloc1(m, &rowlens)); 1201 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1203 PetscCall(PetscFree(rowlens)); 1204 1205 /* fill in and store column indices */ 1206 PetscCall(PetscMalloc1(nz, &colidxs)); 1207 for (cnt = 0, i = 0; i < m; i++) { 1208 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1209 if (garray[B->j[jb]] > cs) break; 1210 colidxs[cnt++] = garray[B->j[jb]]; 1211 } 1212 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1213 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1214 } 1215 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1216 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1217 PetscCall(PetscFree(colidxs)); 1218 1219 /* fill in and store nonzero values */ 1220 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1221 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1222 PetscCall(PetscMalloc1(nz, &matvals)); 1223 for (cnt = 0, i = 0; i < m; i++) { 1224 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1225 if (garray[B->j[jb]] > cs) break; 1226 matvals[cnt++] = ba[jb]; 1227 } 1228 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1229 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1230 } 1231 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1232 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1235 PetscCall(PetscFree(matvals)); 1236 1237 /* write block size option to the viewer's .info file */ 1238 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 #include <petscdraw.h> 1243 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1244 { 1245 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1246 PetscMPIInt rank = aij->rank, size = aij->size; 1247 PetscBool isdraw, iascii, isbinary; 1248 PetscViewer sviewer; 1249 PetscViewerFormat format; 1250 1251 PetscFunctionBegin; 1252 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1253 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1254 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1255 if (iascii) { 1256 PetscCall(PetscViewerGetFormat(viewer, &format)); 1257 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1258 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1259 PetscCall(PetscMalloc1(size, &nz)); 1260 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1261 for (i = 0; i < (PetscInt)size; i++) { 1262 nmax = PetscMax(nmax, nz[i]); 1263 nmin = PetscMin(nmin, nz[i]); 1264 navg += nz[i]; 1265 } 1266 PetscCall(PetscFree(nz)); 1267 navg = navg / size; 1268 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 PetscCall(PetscViewerGetFormat(viewer, &format)); 1272 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1273 MatInfo info; 1274 PetscInt *inodes = NULL; 1275 1276 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1277 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1278 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1279 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1280 if (!inodes) { 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1282 (double)info.memory)); 1283 } else { 1284 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1285 (double)info.memory)); 1286 } 1287 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1288 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1289 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1290 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1291 PetscCall(PetscViewerFlush(viewer)); 1292 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1293 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1294 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1297 PetscInt inodecount, inodelimit, *inodes; 1298 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1299 if (inodes) { 1300 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1301 } else { 1302 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1303 } 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 } else if (isbinary) { 1309 if (size == 1) { 1310 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1311 PetscCall(MatView(aij->A, viewer)); 1312 } else { 1313 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1314 } 1315 PetscFunctionReturn(PETSC_SUCCESS); 1316 } else if (iascii && size == 1) { 1317 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1318 PetscCall(MatView(aij->A, viewer)); 1319 PetscFunctionReturn(PETSC_SUCCESS); 1320 } else if (isdraw) { 1321 PetscDraw draw; 1322 PetscBool isnull; 1323 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1324 PetscCall(PetscDrawIsNull(draw, &isnull)); 1325 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 1328 { /* assemble the entire matrix onto first processor */ 1329 Mat A = NULL, Av; 1330 IS isrow, iscol; 1331 1332 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1333 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1334 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1335 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1336 /* The commented code uses MatCreateSubMatrices instead */ 1337 /* 1338 Mat *AA, A = NULL, Av; 1339 IS isrow,iscol; 1340 1341 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1342 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1343 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1344 if (rank == 0) { 1345 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1346 A = AA[0]; 1347 Av = AA[0]; 1348 } 1349 PetscCall(MatDestroySubMatrices(1,&AA)); 1350 */ 1351 PetscCall(ISDestroy(&iscol)); 1352 PetscCall(ISDestroy(&isrow)); 1353 /* 1354 Everyone has to call to draw the matrix since the graphics waits are 1355 synchronized across all processors that share the PetscDraw object 1356 */ 1357 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1358 if (rank == 0) { 1359 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1360 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1361 } 1362 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1363 PetscCall(MatDestroy(&A)); 1364 } 1365 PetscFunctionReturn(PETSC_SUCCESS); 1366 } 1367 1368 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1369 { 1370 PetscBool iascii, isdraw, issocket, isbinary; 1371 1372 PetscFunctionBegin; 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1374 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1375 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1377 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1382 { 1383 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1384 Vec bb1 = NULL; 1385 PetscBool hasop; 1386 1387 PetscFunctionBegin; 1388 if (flag == SOR_APPLY_UPPER) { 1389 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1390 PetscFunctionReturn(PETSC_SUCCESS); 1391 } 1392 1393 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1394 1395 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1396 if (flag & SOR_ZERO_INITIAL_GUESS) { 1397 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1398 its--; 1399 } 1400 1401 while (its--) { 1402 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1403 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1404 1405 /* update rhs: bb1 = bb - B*x */ 1406 PetscCall(VecScale(mat->lvec, -1.0)); 1407 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1408 1409 /* local sweep */ 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1411 } 1412 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1413 if (flag & SOR_ZERO_INITIAL_GUESS) { 1414 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1415 its--; 1416 } 1417 while (its--) { 1418 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1419 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1420 1421 /* update rhs: bb1 = bb - B*x */ 1422 PetscCall(VecScale(mat->lvec, -1.0)); 1423 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1424 1425 /* local sweep */ 1426 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1427 } 1428 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1429 if (flag & SOR_ZERO_INITIAL_GUESS) { 1430 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1431 its--; 1432 } 1433 while (its--) { 1434 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1435 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1436 1437 /* update rhs: bb1 = bb - B*x */ 1438 PetscCall(VecScale(mat->lvec, -1.0)); 1439 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1440 1441 /* local sweep */ 1442 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1443 } 1444 } else if (flag & SOR_EISENSTAT) { 1445 Vec xx1; 1446 1447 PetscCall(VecDuplicate(bb, &xx1)); 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1449 1450 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1451 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1452 if (!mat->diag) { 1453 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1454 PetscCall(MatGetDiagonal(matin, mat->diag)); 1455 } 1456 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1457 if (hasop) { 1458 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1459 } else { 1460 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1461 } 1462 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1463 1464 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1465 1466 /* local sweep */ 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1468 PetscCall(VecAXPY(xx, 1.0, xx1)); 1469 PetscCall(VecDestroy(&xx1)); 1470 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1471 1472 PetscCall(VecDestroy(&bb1)); 1473 1474 matin->factorerrortype = mat->A->factorerrortype; 1475 PetscFunctionReturn(PETSC_SUCCESS); 1476 } 1477 1478 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1479 { 1480 Mat aA, aB, Aperm; 1481 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1482 PetscScalar *aa, *ba; 1483 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1484 PetscSF rowsf, sf; 1485 IS parcolp = NULL; 1486 PetscBool done; 1487 1488 PetscFunctionBegin; 1489 PetscCall(MatGetLocalSize(A, &m, &n)); 1490 PetscCall(ISGetIndices(rowp, &rwant)); 1491 PetscCall(ISGetIndices(colp, &cwant)); 1492 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1493 1494 /* Invert row permutation to find out where my rows should go */ 1495 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1496 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1497 PetscCall(PetscSFSetFromOptions(rowsf)); 1498 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1499 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1500 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1501 1502 /* Invert column permutation to find out where my columns should go */ 1503 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1504 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1505 PetscCall(PetscSFSetFromOptions(sf)); 1506 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1507 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1508 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1509 PetscCall(PetscSFDestroy(&sf)); 1510 1511 PetscCall(ISRestoreIndices(rowp, &rwant)); 1512 PetscCall(ISRestoreIndices(colp, &cwant)); 1513 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1514 1515 /* Find out where my gcols should go */ 1516 PetscCall(MatGetSize(aB, NULL, &ng)); 1517 PetscCall(PetscMalloc1(ng, &gcdest)); 1518 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1519 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1520 PetscCall(PetscSFSetFromOptions(sf)); 1521 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1522 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1523 PetscCall(PetscSFDestroy(&sf)); 1524 1525 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1526 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1527 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1528 for (i = 0; i < m; i++) { 1529 PetscInt row = rdest[i]; 1530 PetscMPIInt rowner; 1531 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1532 for (j = ai[i]; j < ai[i + 1]; j++) { 1533 PetscInt col = cdest[aj[j]]; 1534 PetscMPIInt cowner; 1535 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1536 if (rowner == cowner) dnnz[i]++; 1537 else onnz[i]++; 1538 } 1539 for (j = bi[i]; j < bi[i + 1]; j++) { 1540 PetscInt col = gcdest[bj[j]]; 1541 PetscMPIInt cowner; 1542 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1543 if (rowner == cowner) dnnz[i]++; 1544 else onnz[i]++; 1545 } 1546 } 1547 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1548 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1549 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1550 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1551 PetscCall(PetscSFDestroy(&rowsf)); 1552 1553 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1554 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1555 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1556 for (i = 0; i < m; i++) { 1557 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1558 PetscInt j0, rowlen; 1559 rowlen = ai[i + 1] - ai[i]; 1560 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1561 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1562 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1563 } 1564 rowlen = bi[i + 1] - bi[i]; 1565 for (j0 = j = 0; j < rowlen; j0 = j) { 1566 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1567 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1568 } 1569 } 1570 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1573 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1574 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1575 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1576 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1577 PetscCall(PetscFree3(work, rdest, cdest)); 1578 PetscCall(PetscFree(gcdest)); 1579 if (parcolp) PetscCall(ISDestroy(&colp)); 1580 *B = Aperm; 1581 PetscFunctionReturn(PETSC_SUCCESS); 1582 } 1583 1584 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1585 { 1586 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1587 1588 PetscFunctionBegin; 1589 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1590 if (ghosts) *ghosts = aij->garray; 1591 PetscFunctionReturn(PETSC_SUCCESS); 1592 } 1593 1594 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1595 { 1596 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1597 Mat A = mat->A, B = mat->B; 1598 PetscLogDouble isend[5], irecv[5]; 1599 1600 PetscFunctionBegin; 1601 info->block_size = 1.0; 1602 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1603 1604 isend[0] = info->nz_used; 1605 isend[1] = info->nz_allocated; 1606 isend[2] = info->nz_unneeded; 1607 isend[3] = info->memory; 1608 isend[4] = info->mallocs; 1609 1610 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1611 1612 isend[0] += info->nz_used; 1613 isend[1] += info->nz_allocated; 1614 isend[2] += info->nz_unneeded; 1615 isend[3] += info->memory; 1616 isend[4] += info->mallocs; 1617 if (flag == MAT_LOCAL) { 1618 info->nz_used = isend[0]; 1619 info->nz_allocated = isend[1]; 1620 info->nz_unneeded = isend[2]; 1621 info->memory = isend[3]; 1622 info->mallocs = isend[4]; 1623 } else if (flag == MAT_GLOBAL_MAX) { 1624 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1625 1626 info->nz_used = irecv[0]; 1627 info->nz_allocated = irecv[1]; 1628 info->nz_unneeded = irecv[2]; 1629 info->memory = irecv[3]; 1630 info->mallocs = irecv[4]; 1631 } else if (flag == MAT_GLOBAL_SUM) { 1632 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1633 1634 info->nz_used = irecv[0]; 1635 info->nz_allocated = irecv[1]; 1636 info->nz_unneeded = irecv[2]; 1637 info->memory = irecv[3]; 1638 info->mallocs = irecv[4]; 1639 } 1640 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1641 info->fill_ratio_needed = 0; 1642 info->factor_mallocs = 0; 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1647 { 1648 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1649 1650 PetscFunctionBegin; 1651 switch (op) { 1652 case MAT_NEW_NONZERO_LOCATIONS: 1653 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1654 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1655 case MAT_KEEP_NONZERO_PATTERN: 1656 case MAT_NEW_NONZERO_LOCATION_ERR: 1657 case MAT_USE_INODES: 1658 case MAT_IGNORE_ZERO_ENTRIES: 1659 case MAT_FORM_EXPLICIT_TRANSPOSE: 1660 MatCheckPreallocated(A, 1); 1661 PetscCall(MatSetOption(a->A, op, flg)); 1662 PetscCall(MatSetOption(a->B, op, flg)); 1663 break; 1664 case MAT_ROW_ORIENTED: 1665 MatCheckPreallocated(A, 1); 1666 a->roworiented = flg; 1667 1668 PetscCall(MatSetOption(a->A, op, flg)); 1669 PetscCall(MatSetOption(a->B, op, flg)); 1670 break; 1671 case MAT_FORCE_DIAGONAL_ENTRIES: 1672 case MAT_SORTED_FULL: 1673 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1674 break; 1675 case MAT_IGNORE_OFF_PROC_ENTRIES: 1676 a->donotstash = flg; 1677 break; 1678 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1679 case MAT_SPD: 1680 case MAT_SYMMETRIC: 1681 case MAT_STRUCTURALLY_SYMMETRIC: 1682 case MAT_HERMITIAN: 1683 case MAT_SYMMETRY_ETERNAL: 1684 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1685 case MAT_SPD_ETERNAL: 1686 /* if the diagonal matrix is square it inherits some of the properties above */ 1687 break; 1688 case MAT_SUBMAT_SINGLEIS: 1689 A->submat_singleis = flg; 1690 break; 1691 case MAT_STRUCTURE_ONLY: 1692 /* The option is handled directly by MatSetOption() */ 1693 break; 1694 default: 1695 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1696 } 1697 PetscFunctionReturn(PETSC_SUCCESS); 1698 } 1699 1700 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1701 { 1702 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1703 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1704 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1705 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1706 PetscInt *cmap, *idx_p; 1707 1708 PetscFunctionBegin; 1709 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1710 mat->getrowactive = PETSC_TRUE; 1711 1712 if (!mat->rowvalues && (idx || v)) { 1713 /* 1714 allocate enough space to hold information from the longest row. 1715 */ 1716 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1717 PetscInt max = 1, tmp; 1718 for (i = 0; i < matin->rmap->n; i++) { 1719 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1720 if (max < tmp) max = tmp; 1721 } 1722 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1723 } 1724 1725 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1726 lrow = row - rstart; 1727 1728 pvA = &vworkA; 1729 pcA = &cworkA; 1730 pvB = &vworkB; 1731 pcB = &cworkB; 1732 if (!v) { 1733 pvA = NULL; 1734 pvB = NULL; 1735 } 1736 if (!idx) { 1737 pcA = NULL; 1738 if (!v) pcB = NULL; 1739 } 1740 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1741 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1742 nztot = nzA + nzB; 1743 1744 cmap = mat->garray; 1745 if (v || idx) { 1746 if (nztot) { 1747 /* Sort by increasing column numbers, assuming A and B already sorted */ 1748 PetscInt imark = -1; 1749 if (v) { 1750 *v = v_p = mat->rowvalues; 1751 for (i = 0; i < nzB; i++) { 1752 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1753 else break; 1754 } 1755 imark = i; 1756 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1757 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1758 } 1759 if (idx) { 1760 *idx = idx_p = mat->rowindices; 1761 if (imark > -1) { 1762 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1763 } else { 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1766 else break; 1767 } 1768 imark = i; 1769 } 1770 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1771 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1772 } 1773 } else { 1774 if (idx) *idx = NULL; 1775 if (v) *v = NULL; 1776 } 1777 } 1778 *nz = nztot; 1779 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1780 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1785 { 1786 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1787 1788 PetscFunctionBegin; 1789 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1790 aij->getrowactive = PETSC_FALSE; 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1795 { 1796 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1797 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1798 PetscInt i, j, cstart = mat->cmap->rstart; 1799 PetscReal sum = 0.0; 1800 const MatScalar *v, *amata, *bmata; 1801 1802 PetscFunctionBegin; 1803 if (aij->size == 1) { 1804 PetscCall(MatNorm(aij->A, type, norm)); 1805 } else { 1806 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1807 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1808 if (type == NORM_FROBENIUS) { 1809 v = amata; 1810 for (i = 0; i < amat->nz; i++) { 1811 sum += PetscRealPart(PetscConj(*v) * (*v)); 1812 v++; 1813 } 1814 v = bmata; 1815 for (i = 0; i < bmat->nz; i++) { 1816 sum += PetscRealPart(PetscConj(*v) * (*v)); 1817 v++; 1818 } 1819 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1820 *norm = PetscSqrtReal(*norm); 1821 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1822 } else if (type == NORM_1) { /* max column norm */ 1823 PetscReal *tmp, *tmp2; 1824 PetscInt *jj, *garray = aij->garray; 1825 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1826 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1827 *norm = 0.0; 1828 v = amata; 1829 jj = amat->j; 1830 for (j = 0; j < amat->nz; j++) { 1831 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1832 v++; 1833 } 1834 v = bmata; 1835 jj = bmat->j; 1836 for (j = 0; j < bmat->nz; j++) { 1837 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1838 v++; 1839 } 1840 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1841 for (j = 0; j < mat->cmap->N; j++) { 1842 if (tmp2[j] > *norm) *norm = tmp2[j]; 1843 } 1844 PetscCall(PetscFree(tmp)); 1845 PetscCall(PetscFree(tmp2)); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1847 } else if (type == NORM_INFINITY) { /* max row norm */ 1848 PetscReal ntemp = 0.0; 1849 for (j = 0; j < aij->A->rmap->n; j++) { 1850 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1851 sum = 0.0; 1852 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1853 sum += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1857 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1858 sum += PetscAbsScalar(*v); 1859 v++; 1860 } 1861 if (sum > ntemp) ntemp = sum; 1862 } 1863 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1864 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1865 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1866 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1867 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1868 } 1869 PetscFunctionReturn(PETSC_SUCCESS); 1870 } 1871 1872 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1873 { 1874 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1875 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1876 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1877 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1878 Mat B, A_diag, *B_diag; 1879 const MatScalar *pbv, *bv; 1880 1881 PetscFunctionBegin; 1882 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1883 ma = A->rmap->n; 1884 na = A->cmap->n; 1885 mb = a->B->rmap->n; 1886 nb = a->B->cmap->n; 1887 ai = Aloc->i; 1888 aj = Aloc->j; 1889 bi = Bloc->i; 1890 bj = Bloc->j; 1891 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1892 PetscInt *d_nnz, *g_nnz, *o_nnz; 1893 PetscSFNode *oloc; 1894 PETSC_UNUSED PetscSF sf; 1895 1896 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1897 /* compute d_nnz for preallocation */ 1898 PetscCall(PetscArrayzero(d_nnz, na)); 1899 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1900 /* compute local off-diagonal contributions */ 1901 PetscCall(PetscArrayzero(g_nnz, nb)); 1902 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1903 /* map those to global */ 1904 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1905 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1906 PetscCall(PetscSFSetFromOptions(sf)); 1907 PetscCall(PetscArrayzero(o_nnz, na)); 1908 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1909 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1910 PetscCall(PetscSFDestroy(&sf)); 1911 1912 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1913 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1914 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1915 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1916 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1917 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1918 } else { 1919 B = *matout; 1920 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1921 } 1922 1923 b = (Mat_MPIAIJ *)B->data; 1924 A_diag = a->A; 1925 B_diag = &b->A; 1926 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1927 A_diag_ncol = A_diag->cmap->N; 1928 B_diag_ilen = sub_B_diag->ilen; 1929 B_diag_i = sub_B_diag->i; 1930 1931 /* Set ilen for diagonal of B */ 1932 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1933 1934 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1935 very quickly (=without using MatSetValues), because all writes are local. */ 1936 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1937 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1938 1939 /* copy over the B part */ 1940 PetscCall(PetscMalloc1(bi[mb], &cols)); 1941 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1942 pbv = bv; 1943 row = A->rmap->rstart; 1944 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1945 cols_tmp = cols; 1946 for (i = 0; i < mb; i++) { 1947 ncol = bi[i + 1] - bi[i]; 1948 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1949 row++; 1950 if (pbv) pbv += ncol; 1951 if (cols_tmp) cols_tmp += ncol; 1952 } 1953 PetscCall(PetscFree(cols)); 1954 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1955 1956 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1957 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1958 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1959 *matout = B; 1960 } else { 1961 PetscCall(MatHeaderMerge(A, &B)); 1962 } 1963 PetscFunctionReturn(PETSC_SUCCESS); 1964 } 1965 1966 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1967 { 1968 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1969 Mat a = aij->A, b = aij->B; 1970 PetscInt s1, s2, s3; 1971 1972 PetscFunctionBegin; 1973 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1974 if (rr) { 1975 PetscCall(VecGetLocalSize(rr, &s1)); 1976 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1977 /* Overlap communication with computation. */ 1978 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1979 } 1980 if (ll) { 1981 PetscCall(VecGetLocalSize(ll, &s1)); 1982 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1983 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1984 } 1985 /* scale the diagonal block */ 1986 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1987 1988 if (rr) { 1989 /* Do a scatter end and then right scale the off-diagonal block */ 1990 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1991 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1992 } 1993 PetscFunctionReturn(PETSC_SUCCESS); 1994 } 1995 1996 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 1997 { 1998 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1999 2000 PetscFunctionBegin; 2001 PetscCall(MatSetUnfactored(a->A)); 2002 PetscFunctionReturn(PETSC_SUCCESS); 2003 } 2004 2005 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2006 { 2007 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2008 Mat a, b, c, d; 2009 PetscBool flg; 2010 2011 PetscFunctionBegin; 2012 a = matA->A; 2013 b = matA->B; 2014 c = matB->A; 2015 d = matB->B; 2016 2017 PetscCall(MatEqual(a, c, &flg)); 2018 if (flg) PetscCall(MatEqual(b, d, &flg)); 2019 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2024 { 2025 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2026 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2027 2028 PetscFunctionBegin; 2029 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2030 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2031 /* because of the column compression in the off-processor part of the matrix a->B, 2032 the number of columns in a->B and b->B may be different, hence we cannot call 2033 the MatCopy() directly on the two parts. If need be, we can provide a more 2034 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2035 then copying the submatrices */ 2036 PetscCall(MatCopy_Basic(A, B, str)); 2037 } else { 2038 PetscCall(MatCopy(a->A, b->A, str)); 2039 PetscCall(MatCopy(a->B, b->B, str)); 2040 } 2041 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2042 PetscFunctionReturn(PETSC_SUCCESS); 2043 } 2044 2045 /* 2046 Computes the number of nonzeros per row needed for preallocation when X and Y 2047 have different nonzero structure. 2048 */ 2049 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2050 { 2051 PetscInt i, j, k, nzx, nzy; 2052 2053 PetscFunctionBegin; 2054 /* Set the number of nonzeros in the new matrix */ 2055 for (i = 0; i < m; i++) { 2056 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2057 nzx = xi[i + 1] - xi[i]; 2058 nzy = yi[i + 1] - yi[i]; 2059 nnz[i] = 0; 2060 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2061 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2062 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2063 nnz[i]++; 2064 } 2065 for (; k < nzy; k++) nnz[i]++; 2066 } 2067 PetscFunctionReturn(PETSC_SUCCESS); 2068 } 2069 2070 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2071 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2072 { 2073 PetscInt m = Y->rmap->N; 2074 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2075 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2076 2077 PetscFunctionBegin; 2078 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2083 { 2084 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2085 2086 PetscFunctionBegin; 2087 if (str == SAME_NONZERO_PATTERN) { 2088 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2089 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2090 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2091 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2092 } else { 2093 Mat B; 2094 PetscInt *nnz_d, *nnz_o; 2095 2096 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2097 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2098 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2099 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2100 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2101 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2102 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2103 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2104 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2105 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2106 PetscCall(MatHeaderMerge(Y, &B)); 2107 PetscCall(PetscFree(nnz_d)); 2108 PetscCall(PetscFree(nnz_o)); 2109 } 2110 PetscFunctionReturn(PETSC_SUCCESS); 2111 } 2112 2113 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2114 2115 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2116 { 2117 PetscFunctionBegin; 2118 if (PetscDefined(USE_COMPLEX)) { 2119 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2120 2121 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2122 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2123 } 2124 PetscFunctionReturn(PETSC_SUCCESS); 2125 } 2126 2127 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2128 { 2129 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2130 2131 PetscFunctionBegin; 2132 PetscCall(MatRealPart(a->A)); 2133 PetscCall(MatRealPart(a->B)); 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatImaginaryPart(a->A)); 2143 PetscCall(MatImaginaryPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 PetscInt i, *idxb = NULL, m = A->rmap->n; 2151 PetscScalar *va, *vv; 2152 Vec vB, vA; 2153 const PetscScalar *vb; 2154 2155 PetscFunctionBegin; 2156 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2157 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2158 2159 PetscCall(VecGetArrayWrite(vA, &va)); 2160 if (idx) { 2161 for (i = 0; i < m; i++) { 2162 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2163 } 2164 } 2165 2166 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2167 PetscCall(PetscMalloc1(m, &idxb)); 2168 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2169 2170 PetscCall(VecGetArrayWrite(v, &vv)); 2171 PetscCall(VecGetArrayRead(vB, &vb)); 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2174 vv[i] = vb[i]; 2175 if (idx) idx[i] = a->garray[idxb[i]]; 2176 } else { 2177 vv[i] = va[i]; 2178 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2179 } 2180 } 2181 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2182 PetscCall(VecRestoreArrayWrite(vA, &va)); 2183 PetscCall(VecRestoreArrayRead(vB, &vb)); 2184 PetscCall(PetscFree(idxb)); 2185 PetscCall(VecDestroy(&vA)); 2186 PetscCall(VecDestroy(&vB)); 2187 PetscFunctionReturn(PETSC_SUCCESS); 2188 } 2189 2190 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2191 { 2192 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2193 PetscInt m = A->rmap->n; 2194 Vec vB, vA; 2195 2196 PetscFunctionBegin; 2197 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2198 PetscCall(MatGetRowSumAbs(a->A, vA)); 2199 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2200 PetscCall(MatGetRowSumAbs(a->B, vB)); 2201 PetscCall(VecAXPY(vA, 1.0, vB)); 2202 PetscCall(VecDestroy(&vB)); 2203 PetscCall(VecCopy(vA, v)); 2204 PetscCall(VecDestroy(&vA)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2209 { 2210 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2211 PetscInt m = A->rmap->n, n = A->cmap->n; 2212 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2213 PetscInt *cmap = mat->garray; 2214 PetscInt *diagIdx, *offdiagIdx; 2215 Vec diagV, offdiagV; 2216 PetscScalar *a, *diagA, *offdiagA; 2217 const PetscScalar *ba, *bav; 2218 PetscInt r, j, col, ncols, *bi, *bj; 2219 Mat B = mat->B; 2220 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2221 2222 PetscFunctionBegin; 2223 /* When a process holds entire A and other processes have no entry */ 2224 if (A->cmap->N == n) { 2225 PetscCall(VecGetArrayWrite(v, &diagA)); 2226 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2227 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2228 PetscCall(VecDestroy(&diagV)); 2229 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2230 PetscFunctionReturn(PETSC_SUCCESS); 2231 } else if (n == 0) { 2232 if (m) { 2233 PetscCall(VecGetArrayWrite(v, &a)); 2234 for (r = 0; r < m; r++) { 2235 a[r] = 0.0; 2236 if (idx) idx[r] = -1; 2237 } 2238 PetscCall(VecRestoreArrayWrite(v, &a)); 2239 } 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } 2242 2243 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2244 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2245 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2246 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2247 2248 /* Get offdiagIdx[] for implicit 0.0 */ 2249 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2250 ba = bav; 2251 bi = b->i; 2252 bj = b->j; 2253 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2254 for (r = 0; r < m; r++) { 2255 ncols = bi[r + 1] - bi[r]; 2256 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2257 offdiagA[r] = *ba; 2258 offdiagIdx[r] = cmap[0]; 2259 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2260 offdiagA[r] = 0.0; 2261 2262 /* Find first hole in the cmap */ 2263 for (j = 0; j < ncols; j++) { 2264 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2265 if (col > j && j < cstart) { 2266 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2267 break; 2268 } else if (col > j + n && j >= cstart) { 2269 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2270 break; 2271 } 2272 } 2273 if (j == ncols && ncols < A->cmap->N - n) { 2274 /* a hole is outside compressed Bcols */ 2275 if (ncols == 0) { 2276 if (cstart) { 2277 offdiagIdx[r] = 0; 2278 } else offdiagIdx[r] = cend; 2279 } else { /* ncols > 0 */ 2280 offdiagIdx[r] = cmap[ncols - 1] + 1; 2281 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2282 } 2283 } 2284 } 2285 2286 for (j = 0; j < ncols; j++) { 2287 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2288 offdiagA[r] = *ba; 2289 offdiagIdx[r] = cmap[*bj]; 2290 } 2291 ba++; 2292 bj++; 2293 } 2294 } 2295 2296 PetscCall(VecGetArrayWrite(v, &a)); 2297 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2298 for (r = 0; r < m; ++r) { 2299 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2300 a[r] = diagA[r]; 2301 if (idx) idx[r] = cstart + diagIdx[r]; 2302 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2303 a[r] = diagA[r]; 2304 if (idx) { 2305 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2306 idx[r] = cstart + diagIdx[r]; 2307 } else idx[r] = offdiagIdx[r]; 2308 } 2309 } else { 2310 a[r] = offdiagA[r]; 2311 if (idx) idx[r] = offdiagIdx[r]; 2312 } 2313 } 2314 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2315 PetscCall(VecRestoreArrayWrite(v, &a)); 2316 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2317 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2318 PetscCall(VecDestroy(&diagV)); 2319 PetscCall(VecDestroy(&offdiagV)); 2320 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2321 PetscFunctionReturn(PETSC_SUCCESS); 2322 } 2323 2324 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2325 { 2326 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2327 PetscInt m = A->rmap->n, n = A->cmap->n; 2328 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2329 PetscInt *cmap = mat->garray; 2330 PetscInt *diagIdx, *offdiagIdx; 2331 Vec diagV, offdiagV; 2332 PetscScalar *a, *diagA, *offdiagA; 2333 const PetscScalar *ba, *bav; 2334 PetscInt r, j, col, ncols, *bi, *bj; 2335 Mat B = mat->B; 2336 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2337 2338 PetscFunctionBegin; 2339 /* When a process holds entire A and other processes have no entry */ 2340 if (A->cmap->N == n) { 2341 PetscCall(VecGetArrayWrite(v, &diagA)); 2342 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2343 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2344 PetscCall(VecDestroy(&diagV)); 2345 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2346 PetscFunctionReturn(PETSC_SUCCESS); 2347 } else if (n == 0) { 2348 if (m) { 2349 PetscCall(VecGetArrayWrite(v, &a)); 2350 for (r = 0; r < m; r++) { 2351 a[r] = PETSC_MAX_REAL; 2352 if (idx) idx[r] = -1; 2353 } 2354 PetscCall(VecRestoreArrayWrite(v, &a)); 2355 } 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } 2358 2359 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2360 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2361 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2362 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2363 2364 /* Get offdiagIdx[] for implicit 0.0 */ 2365 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2366 ba = bav; 2367 bi = b->i; 2368 bj = b->j; 2369 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2370 for (r = 0; r < m; r++) { 2371 ncols = bi[r + 1] - bi[r]; 2372 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2373 offdiagA[r] = *ba; 2374 offdiagIdx[r] = cmap[0]; 2375 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2376 offdiagA[r] = 0.0; 2377 2378 /* Find first hole in the cmap */ 2379 for (j = 0; j < ncols; j++) { 2380 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2381 if (col > j && j < cstart) { 2382 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2383 break; 2384 } else if (col > j + n && j >= cstart) { 2385 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2386 break; 2387 } 2388 } 2389 if (j == ncols && ncols < A->cmap->N - n) { 2390 /* a hole is outside compressed Bcols */ 2391 if (ncols == 0) { 2392 if (cstart) { 2393 offdiagIdx[r] = 0; 2394 } else offdiagIdx[r] = cend; 2395 } else { /* ncols > 0 */ 2396 offdiagIdx[r] = cmap[ncols - 1] + 1; 2397 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2398 } 2399 } 2400 } 2401 2402 for (j = 0; j < ncols; j++) { 2403 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2404 offdiagA[r] = *ba; 2405 offdiagIdx[r] = cmap[*bj]; 2406 } 2407 ba++; 2408 bj++; 2409 } 2410 } 2411 2412 PetscCall(VecGetArrayWrite(v, &a)); 2413 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2414 for (r = 0; r < m; ++r) { 2415 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2416 a[r] = diagA[r]; 2417 if (idx) idx[r] = cstart + diagIdx[r]; 2418 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2419 a[r] = diagA[r]; 2420 if (idx) { 2421 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2422 idx[r] = cstart + diagIdx[r]; 2423 } else idx[r] = offdiagIdx[r]; 2424 } 2425 } else { 2426 a[r] = offdiagA[r]; 2427 if (idx) idx[r] = offdiagIdx[r]; 2428 } 2429 } 2430 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2431 PetscCall(VecRestoreArrayWrite(v, &a)); 2432 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2433 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2434 PetscCall(VecDestroy(&diagV)); 2435 PetscCall(VecDestroy(&offdiagV)); 2436 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2437 PetscFunctionReturn(PETSC_SUCCESS); 2438 } 2439 2440 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2441 { 2442 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2443 PetscInt m = A->rmap->n, n = A->cmap->n; 2444 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2445 PetscInt *cmap = mat->garray; 2446 PetscInt *diagIdx, *offdiagIdx; 2447 Vec diagV, offdiagV; 2448 PetscScalar *a, *diagA, *offdiagA; 2449 const PetscScalar *ba, *bav; 2450 PetscInt r, j, col, ncols, *bi, *bj; 2451 Mat B = mat->B; 2452 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2453 2454 PetscFunctionBegin; 2455 /* When a process holds entire A and other processes have no entry */ 2456 if (A->cmap->N == n) { 2457 PetscCall(VecGetArrayWrite(v, &diagA)); 2458 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2459 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2460 PetscCall(VecDestroy(&diagV)); 2461 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2462 PetscFunctionReturn(PETSC_SUCCESS); 2463 } else if (n == 0) { 2464 if (m) { 2465 PetscCall(VecGetArrayWrite(v, &a)); 2466 for (r = 0; r < m; r++) { 2467 a[r] = PETSC_MIN_REAL; 2468 if (idx) idx[r] = -1; 2469 } 2470 PetscCall(VecRestoreArrayWrite(v, &a)); 2471 } 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } 2474 2475 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2476 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2477 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2478 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2479 2480 /* Get offdiagIdx[] for implicit 0.0 */ 2481 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2482 ba = bav; 2483 bi = b->i; 2484 bj = b->j; 2485 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2486 for (r = 0; r < m; r++) { 2487 ncols = bi[r + 1] - bi[r]; 2488 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2489 offdiagA[r] = *ba; 2490 offdiagIdx[r] = cmap[0]; 2491 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2492 offdiagA[r] = 0.0; 2493 2494 /* Find first hole in the cmap */ 2495 for (j = 0; j < ncols; j++) { 2496 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2497 if (col > j && j < cstart) { 2498 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2499 break; 2500 } else if (col > j + n && j >= cstart) { 2501 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2502 break; 2503 } 2504 } 2505 if (j == ncols && ncols < A->cmap->N - n) { 2506 /* a hole is outside compressed Bcols */ 2507 if (ncols == 0) { 2508 if (cstart) { 2509 offdiagIdx[r] = 0; 2510 } else offdiagIdx[r] = cend; 2511 } else { /* ncols > 0 */ 2512 offdiagIdx[r] = cmap[ncols - 1] + 1; 2513 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2514 } 2515 } 2516 } 2517 2518 for (j = 0; j < ncols; j++) { 2519 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2520 offdiagA[r] = *ba; 2521 offdiagIdx[r] = cmap[*bj]; 2522 } 2523 ba++; 2524 bj++; 2525 } 2526 } 2527 2528 PetscCall(VecGetArrayWrite(v, &a)); 2529 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2530 for (r = 0; r < m; ++r) { 2531 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2532 a[r] = diagA[r]; 2533 if (idx) idx[r] = cstart + diagIdx[r]; 2534 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2535 a[r] = diagA[r]; 2536 if (idx) { 2537 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2538 idx[r] = cstart + diagIdx[r]; 2539 } else idx[r] = offdiagIdx[r]; 2540 } 2541 } else { 2542 a[r] = offdiagA[r]; 2543 if (idx) idx[r] = offdiagIdx[r]; 2544 } 2545 } 2546 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2547 PetscCall(VecRestoreArrayWrite(v, &a)); 2548 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2549 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2550 PetscCall(VecDestroy(&diagV)); 2551 PetscCall(VecDestroy(&offdiagV)); 2552 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2553 PetscFunctionReturn(PETSC_SUCCESS); 2554 } 2555 2556 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2557 { 2558 Mat *dummy; 2559 2560 PetscFunctionBegin; 2561 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2562 *newmat = *dummy; 2563 PetscCall(PetscFree(dummy)); 2564 PetscFunctionReturn(PETSC_SUCCESS); 2565 } 2566 2567 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2568 { 2569 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2570 2571 PetscFunctionBegin; 2572 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2573 A->factorerrortype = a->A->factorerrortype; 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2578 { 2579 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2580 2581 PetscFunctionBegin; 2582 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2583 PetscCall(MatSetRandom(aij->A, rctx)); 2584 if (x->assembled) { 2585 PetscCall(MatSetRandom(aij->B, rctx)); 2586 } else { 2587 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2588 } 2589 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2590 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2591 PetscFunctionReturn(PETSC_SUCCESS); 2592 } 2593 2594 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2595 { 2596 PetscFunctionBegin; 2597 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2598 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2599 PetscFunctionReturn(PETSC_SUCCESS); 2600 } 2601 2602 /*@ 2603 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2604 2605 Not Collective 2606 2607 Input Parameter: 2608 . A - the matrix 2609 2610 Output Parameter: 2611 . nz - the number of nonzeros 2612 2613 Level: advanced 2614 2615 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2616 @*/ 2617 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2618 { 2619 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2620 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2621 PetscBool isaij; 2622 2623 PetscFunctionBegin; 2624 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2625 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2626 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2627 PetscFunctionReturn(PETSC_SUCCESS); 2628 } 2629 2630 /*@ 2631 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2632 2633 Collective 2634 2635 Input Parameters: 2636 + A - the matrix 2637 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2638 2639 Level: advanced 2640 2641 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2642 @*/ 2643 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2644 { 2645 PetscFunctionBegin; 2646 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2647 PetscFunctionReturn(PETSC_SUCCESS); 2648 } 2649 2650 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2651 { 2652 PetscBool sc = PETSC_FALSE, flg; 2653 2654 PetscFunctionBegin; 2655 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2656 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2657 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2658 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2659 PetscOptionsHeadEnd(); 2660 PetscFunctionReturn(PETSC_SUCCESS); 2661 } 2662 2663 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2664 { 2665 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2666 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2667 2668 PetscFunctionBegin; 2669 if (!Y->preallocated) { 2670 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2671 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2672 PetscInt nonew = aij->nonew; 2673 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2674 aij->nonew = nonew; 2675 } 2676 PetscCall(MatShift_Basic(Y, a)); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2681 { 2682 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2683 2684 PetscFunctionBegin; 2685 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2686 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2687 if (d) { 2688 PetscInt rstart; 2689 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2690 *d += rstart; 2691 } 2692 PetscFunctionReturn(PETSC_SUCCESS); 2693 } 2694 2695 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2696 { 2697 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2698 2699 PetscFunctionBegin; 2700 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2701 PetscFunctionReturn(PETSC_SUCCESS); 2702 } 2703 2704 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2705 { 2706 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2707 2708 PetscFunctionBegin; 2709 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2710 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2715 MatGetRow_MPIAIJ, 2716 MatRestoreRow_MPIAIJ, 2717 MatMult_MPIAIJ, 2718 /* 4*/ MatMultAdd_MPIAIJ, 2719 MatMultTranspose_MPIAIJ, 2720 MatMultTransposeAdd_MPIAIJ, 2721 NULL, 2722 NULL, 2723 NULL, 2724 /*10*/ NULL, 2725 NULL, 2726 NULL, 2727 MatSOR_MPIAIJ, 2728 MatTranspose_MPIAIJ, 2729 /*15*/ MatGetInfo_MPIAIJ, 2730 MatEqual_MPIAIJ, 2731 MatGetDiagonal_MPIAIJ, 2732 MatDiagonalScale_MPIAIJ, 2733 MatNorm_MPIAIJ, 2734 /*20*/ MatAssemblyBegin_MPIAIJ, 2735 MatAssemblyEnd_MPIAIJ, 2736 MatSetOption_MPIAIJ, 2737 MatZeroEntries_MPIAIJ, 2738 /*24*/ MatZeroRows_MPIAIJ, 2739 NULL, 2740 NULL, 2741 NULL, 2742 NULL, 2743 /*29*/ MatSetUp_MPI_Hash, 2744 NULL, 2745 NULL, 2746 MatGetDiagonalBlock_MPIAIJ, 2747 NULL, 2748 /*34*/ MatDuplicate_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*39*/ MatAXPY_MPIAIJ, 2754 MatCreateSubMatrices_MPIAIJ, 2755 MatIncreaseOverlap_MPIAIJ, 2756 MatGetValues_MPIAIJ, 2757 MatCopy_MPIAIJ, 2758 /*44*/ MatGetRowMax_MPIAIJ, 2759 MatScale_MPIAIJ, 2760 MatShift_MPIAIJ, 2761 MatDiagonalSet_MPIAIJ, 2762 MatZeroRowsColumns_MPIAIJ, 2763 /*49*/ MatSetRandom_MPIAIJ, 2764 MatGetRowIJ_MPIAIJ, 2765 MatRestoreRowIJ_MPIAIJ, 2766 NULL, 2767 NULL, 2768 /*54*/ MatFDColoringCreate_MPIXAIJ, 2769 NULL, 2770 MatSetUnfactored_MPIAIJ, 2771 MatPermute_MPIAIJ, 2772 NULL, 2773 /*59*/ MatCreateSubMatrix_MPIAIJ, 2774 MatDestroy_MPIAIJ, 2775 MatView_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*64*/ NULL, 2779 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2780 NULL, 2781 NULL, 2782 NULL, 2783 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2784 MatGetRowMinAbs_MPIAIJ, 2785 NULL, 2786 NULL, 2787 NULL, 2788 NULL, 2789 /*75*/ MatFDColoringApply_AIJ, 2790 MatSetFromOptions_MPIAIJ, 2791 NULL, 2792 NULL, 2793 MatFindZeroDiagonals_MPIAIJ, 2794 /*80*/ NULL, 2795 NULL, 2796 NULL, 2797 /*83*/ MatLoad_MPIAIJ, 2798 NULL, 2799 NULL, 2800 NULL, 2801 NULL, 2802 NULL, 2803 /*89*/ NULL, 2804 NULL, 2805 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2806 NULL, 2807 NULL, 2808 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2809 NULL, 2810 NULL, 2811 NULL, 2812 MatBindToCPU_MPIAIJ, 2813 /*99*/ MatProductSetFromOptions_MPIAIJ, 2814 NULL, 2815 NULL, 2816 MatConjugate_MPIAIJ, 2817 NULL, 2818 /*104*/ MatSetValuesRow_MPIAIJ, 2819 MatRealPart_MPIAIJ, 2820 MatImaginaryPart_MPIAIJ, 2821 NULL, 2822 NULL, 2823 /*109*/ NULL, 2824 NULL, 2825 MatGetRowMin_MPIAIJ, 2826 NULL, 2827 MatMissingDiagonal_MPIAIJ, 2828 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2829 NULL, 2830 MatGetGhosts_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2834 NULL, 2835 NULL, 2836 NULL, 2837 MatGetMultiProcBlock_MPIAIJ, 2838 /*124*/ MatFindNonzeroRows_MPIAIJ, 2839 MatGetColumnReductions_MPIAIJ, 2840 MatInvertBlockDiagonal_MPIAIJ, 2841 MatInvertVariableBlockDiagonal_MPIAIJ, 2842 MatCreateSubMatricesMPI_MPIAIJ, 2843 /*129*/ NULL, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*134*/ NULL, 2849 NULL, 2850 NULL, 2851 NULL, 2852 NULL, 2853 /*139*/ MatSetBlockSizes_MPIAIJ, 2854 NULL, 2855 NULL, 2856 MatFDColoringSetUp_MPIXAIJ, 2857 MatFindOffBlockDiagonalEntries_MPIAIJ, 2858 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2859 /*145*/ NULL, 2860 NULL, 2861 NULL, 2862 MatCreateGraph_Simple_AIJ, 2863 NULL, 2864 /*150*/ NULL, 2865 MatEliminateZeros_MPIAIJ, 2866 MatGetRowSumAbs_MPIAIJ}; 2867 2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2869 { 2870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2871 2872 PetscFunctionBegin; 2873 PetscCall(MatStoreValues(aij->A)); 2874 PetscCall(MatStoreValues(aij->B)); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatRetrieveValues(aij->A)); 2884 PetscCall(MatRetrieveValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2889 { 2890 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2891 PetscMPIInt size; 2892 2893 PetscFunctionBegin; 2894 if (B->hash_active) { 2895 B->ops[0] = b->cops; 2896 B->hash_active = PETSC_FALSE; 2897 } 2898 PetscCall(PetscLayoutSetUp(B->rmap)); 2899 PetscCall(PetscLayoutSetUp(B->cmap)); 2900 2901 #if defined(PETSC_USE_CTABLE) 2902 PetscCall(PetscHMapIDestroy(&b->colmap)); 2903 #else 2904 PetscCall(PetscFree(b->colmap)); 2905 #endif 2906 PetscCall(PetscFree(b->garray)); 2907 PetscCall(VecDestroy(&b->lvec)); 2908 PetscCall(VecScatterDestroy(&b->Mvctx)); 2909 2910 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2911 2912 MatSeqXAIJGetOptions_Private(b->B); 2913 PetscCall(MatDestroy(&b->B)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2915 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2916 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2917 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2918 MatSeqXAIJRestoreOptions_Private(b->B); 2919 2920 MatSeqXAIJGetOptions_Private(b->A); 2921 PetscCall(MatDestroy(&b->A)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 MatSeqXAIJRestoreOptions_Private(b->A); 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(PETSC_SUCCESS); 2934 } 2935 2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2939 2940 PetscFunctionBegin; 2941 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2942 PetscCall(PetscLayoutSetUp(B->rmap)); 2943 PetscCall(PetscLayoutSetUp(B->cmap)); 2944 2945 #if defined(PETSC_USE_CTABLE) 2946 PetscCall(PetscHMapIDestroy(&b->colmap)); 2947 #else 2948 PetscCall(PetscFree(b->colmap)); 2949 #endif 2950 PetscCall(PetscFree(b->garray)); 2951 PetscCall(VecDestroy(&b->lvec)); 2952 PetscCall(VecScatterDestroy(&b->Mvctx)); 2953 2954 PetscCall(MatResetPreallocation(b->A)); 2955 PetscCall(MatResetPreallocation(b->B)); 2956 B->preallocated = PETSC_TRUE; 2957 B->was_assembled = PETSC_FALSE; 2958 B->assembled = PETSC_FALSE; 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2963 { 2964 Mat mat; 2965 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2966 2967 PetscFunctionBegin; 2968 *newmat = NULL; 2969 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2970 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2971 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2972 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2973 a = (Mat_MPIAIJ *)mat->data; 2974 2975 mat->factortype = matin->factortype; 2976 mat->assembled = matin->assembled; 2977 mat->insertmode = NOT_SET_VALUES; 2978 2979 a->size = oldmat->size; 2980 a->rank = oldmat->rank; 2981 a->donotstash = oldmat->donotstash; 2982 a->roworiented = oldmat->roworiented; 2983 a->rowindices = NULL; 2984 a->rowvalues = NULL; 2985 a->getrowactive = PETSC_FALSE; 2986 2987 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2988 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2989 if (matin->hash_active) { 2990 PetscCall(MatSetUp(mat)); 2991 } else { 2992 mat->preallocated = matin->preallocated; 2993 if (oldmat->colmap) { 2994 #if defined(PETSC_USE_CTABLE) 2995 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2996 #else 2997 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2998 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2999 #endif 3000 } else a->colmap = NULL; 3001 if (oldmat->garray) { 3002 PetscInt len; 3003 len = oldmat->B->cmap->n; 3004 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3005 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3006 } else a->garray = NULL; 3007 3008 /* It may happen MatDuplicate is called with a non-assembled matrix 3009 In fact, MatDuplicate only requires the matrix to be preallocated 3010 This may happen inside a DMCreateMatrix_Shell */ 3011 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3012 if (oldmat->Mvctx) PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3013 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3014 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3015 } 3016 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3017 *newmat = mat; 3018 PetscFunctionReturn(PETSC_SUCCESS); 3019 } 3020 3021 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3022 { 3023 PetscBool isbinary, ishdf5; 3024 3025 PetscFunctionBegin; 3026 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3027 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3028 /* force binary viewer to load .info file if it has not yet done so */ 3029 PetscCall(PetscViewerSetUp(viewer)); 3030 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3031 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3032 if (isbinary) { 3033 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3034 } else if (ishdf5) { 3035 #if defined(PETSC_HAVE_HDF5) 3036 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3037 #else 3038 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3039 #endif 3040 } else { 3041 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3042 } 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3047 { 3048 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3049 PetscInt *rowidxs, *colidxs; 3050 PetscScalar *matvals; 3051 3052 PetscFunctionBegin; 3053 PetscCall(PetscViewerSetUp(viewer)); 3054 3055 /* read in matrix header */ 3056 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3057 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3058 M = header[1]; 3059 N = header[2]; 3060 nz = header[3]; 3061 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3062 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3063 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3064 3065 /* set block sizes from the viewer's .info file */ 3066 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3067 /* set global sizes if not set already */ 3068 if (mat->rmap->N < 0) mat->rmap->N = M; 3069 if (mat->cmap->N < 0) mat->cmap->N = N; 3070 PetscCall(PetscLayoutSetUp(mat->rmap)); 3071 PetscCall(PetscLayoutSetUp(mat->cmap)); 3072 3073 /* check if the matrix sizes are correct */ 3074 PetscCall(MatGetSize(mat, &rows, &cols)); 3075 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3076 3077 /* read in row lengths and build row indices */ 3078 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3079 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3080 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3081 rowidxs[0] = 0; 3082 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3083 if (nz != PETSC_MAX_INT) { 3084 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3085 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3086 } 3087 3088 /* read in column indices and matrix values */ 3089 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3090 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3091 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3092 /* store matrix indices and values */ 3093 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3094 PetscCall(PetscFree(rowidxs)); 3095 PetscCall(PetscFree2(colidxs, matvals)); 3096 PetscFunctionReturn(PETSC_SUCCESS); 3097 } 3098 3099 /* Not scalable because of ISAllGather() unless getting all columns. */ 3100 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3101 { 3102 IS iscol_local; 3103 PetscBool isstride; 3104 PetscMPIInt lisstride = 0, gisstride; 3105 3106 PetscFunctionBegin; 3107 /* check if we are grabbing all columns*/ 3108 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3109 3110 if (isstride) { 3111 PetscInt start, len, mstart, mlen; 3112 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3113 PetscCall(ISGetLocalSize(iscol, &len)); 3114 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3115 if (mstart == start && mlen - mstart == len) lisstride = 1; 3116 } 3117 3118 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3119 if (gisstride) { 3120 PetscInt N; 3121 PetscCall(MatGetSize(mat, NULL, &N)); 3122 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3123 PetscCall(ISSetIdentity(iscol_local)); 3124 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3125 } else { 3126 PetscInt cbs; 3127 PetscCall(ISGetBlockSize(iscol, &cbs)); 3128 PetscCall(ISAllGather(iscol, &iscol_local)); 3129 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3130 } 3131 3132 *isseq = iscol_local; 3133 PetscFunctionReturn(PETSC_SUCCESS); 3134 } 3135 3136 /* 3137 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3138 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3139 3140 Input Parameters: 3141 + mat - matrix 3142 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3143 i.e., mat->rstart <= isrow[i] < mat->rend 3144 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3145 i.e., mat->cstart <= iscol[i] < mat->cend 3146 3147 Output Parameters: 3148 + isrow_d - sequential row index set for retrieving mat->A 3149 . iscol_d - sequential column index set for retrieving mat->A 3150 . iscol_o - sequential column index set for retrieving mat->B 3151 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3152 */ 3153 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3154 { 3155 Vec x, cmap; 3156 const PetscInt *is_idx; 3157 PetscScalar *xarray, *cmaparray; 3158 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3160 Mat B = a->B; 3161 Vec lvec = a->lvec, lcmap; 3162 PetscInt i, cstart, cend, Bn = B->cmap->N; 3163 MPI_Comm comm; 3164 VecScatter Mvctx = a->Mvctx; 3165 3166 PetscFunctionBegin; 3167 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3168 PetscCall(ISGetLocalSize(iscol, &ncols)); 3169 3170 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3171 PetscCall(MatCreateVecs(mat, &x, NULL)); 3172 PetscCall(VecSet(x, -1.0)); 3173 PetscCall(VecDuplicate(x, &cmap)); 3174 PetscCall(VecSet(cmap, -1.0)); 3175 3176 /* Get start indices */ 3177 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3178 isstart -= ncols; 3179 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3180 3181 PetscCall(ISGetIndices(iscol, &is_idx)); 3182 PetscCall(VecGetArray(x, &xarray)); 3183 PetscCall(VecGetArray(cmap, &cmaparray)); 3184 PetscCall(PetscMalloc1(ncols, &idx)); 3185 for (i = 0; i < ncols; i++) { 3186 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3187 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3188 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3189 } 3190 PetscCall(VecRestoreArray(x, &xarray)); 3191 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3192 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3193 3194 /* Get iscol_d */ 3195 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3196 PetscCall(ISGetBlockSize(iscol, &i)); 3197 PetscCall(ISSetBlockSize(*iscol_d, i)); 3198 3199 /* Get isrow_d */ 3200 PetscCall(ISGetLocalSize(isrow, &m)); 3201 rstart = mat->rmap->rstart; 3202 PetscCall(PetscMalloc1(m, &idx)); 3203 PetscCall(ISGetIndices(isrow, &is_idx)); 3204 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3205 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3206 3207 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3208 PetscCall(ISGetBlockSize(isrow, &i)); 3209 PetscCall(ISSetBlockSize(*isrow_d, i)); 3210 3211 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3212 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3213 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3214 3215 PetscCall(VecDuplicate(lvec, &lcmap)); 3216 3217 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3218 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3219 3220 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3221 /* off-process column indices */ 3222 count = 0; 3223 PetscCall(PetscMalloc1(Bn, &idx)); 3224 PetscCall(PetscMalloc1(Bn, &cmap1)); 3225 3226 PetscCall(VecGetArray(lvec, &xarray)); 3227 PetscCall(VecGetArray(lcmap, &cmaparray)); 3228 for (i = 0; i < Bn; i++) { 3229 if (PetscRealPart(xarray[i]) > -1.0) { 3230 idx[count] = i; /* local column index in off-diagonal part B */ 3231 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3232 count++; 3233 } 3234 } 3235 PetscCall(VecRestoreArray(lvec, &xarray)); 3236 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3237 3238 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3239 /* cannot ensure iscol_o has same blocksize as iscol! */ 3240 3241 PetscCall(PetscFree(idx)); 3242 *garray = cmap1; 3243 3244 PetscCall(VecDestroy(&x)); 3245 PetscCall(VecDestroy(&cmap)); 3246 PetscCall(VecDestroy(&lcmap)); 3247 PetscFunctionReturn(PETSC_SUCCESS); 3248 } 3249 3250 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3251 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3252 { 3253 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3254 Mat M = NULL; 3255 MPI_Comm comm; 3256 IS iscol_d, isrow_d, iscol_o; 3257 Mat Asub = NULL, Bsub = NULL; 3258 PetscInt n; 3259 3260 PetscFunctionBegin; 3261 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3262 3263 if (call == MAT_REUSE_MATRIX) { 3264 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3265 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3266 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3267 3268 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3269 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3270 3271 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3272 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3273 3274 /* Update diagonal and off-diagonal portions of submat */ 3275 asub = (Mat_MPIAIJ *)(*submat)->data; 3276 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3277 PetscCall(ISGetLocalSize(iscol_o, &n)); 3278 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3279 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3280 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3281 3282 } else { /* call == MAT_INITIAL_MATRIX) */ 3283 const PetscInt *garray; 3284 PetscInt BsubN; 3285 3286 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3287 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3288 3289 /* Create local submatrices Asub and Bsub */ 3290 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3291 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3292 3293 /* Create submatrix M */ 3294 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3295 3296 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3297 asub = (Mat_MPIAIJ *)M->data; 3298 3299 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3300 n = asub->B->cmap->N; 3301 if (BsubN > n) { 3302 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3303 const PetscInt *idx; 3304 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3305 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3306 3307 PetscCall(PetscMalloc1(n, &idx_new)); 3308 j = 0; 3309 PetscCall(ISGetIndices(iscol_o, &idx)); 3310 for (i = 0; i < n; i++) { 3311 if (j >= BsubN) break; 3312 while (subgarray[i] > garray[j]) j++; 3313 3314 if (subgarray[i] == garray[j]) { 3315 idx_new[i] = idx[j++]; 3316 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3317 } 3318 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3319 3320 PetscCall(ISDestroy(&iscol_o)); 3321 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3322 3323 } else if (BsubN < n) { 3324 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3325 } 3326 3327 PetscCall(PetscFree(garray)); 3328 *submat = M; 3329 3330 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3331 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3332 PetscCall(ISDestroy(&isrow_d)); 3333 3334 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3335 PetscCall(ISDestroy(&iscol_d)); 3336 3337 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3338 PetscCall(ISDestroy(&iscol_o)); 3339 } 3340 PetscFunctionReturn(PETSC_SUCCESS); 3341 } 3342 3343 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3344 { 3345 IS iscol_local = NULL, isrow_d; 3346 PetscInt csize; 3347 PetscInt n, i, j, start, end; 3348 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3349 MPI_Comm comm; 3350 3351 PetscFunctionBegin; 3352 /* If isrow has same processor distribution as mat, 3353 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3354 if (call == MAT_REUSE_MATRIX) { 3355 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3356 if (isrow_d) { 3357 sameRowDist = PETSC_TRUE; 3358 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3359 } else { 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3361 if (iscol_local) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3364 } 3365 } 3366 } else { 3367 /* Check if isrow has same processor distribution as mat */ 3368 sameDist[0] = PETSC_FALSE; 3369 PetscCall(ISGetLocalSize(isrow, &n)); 3370 if (!n) { 3371 sameDist[0] = PETSC_TRUE; 3372 } else { 3373 PetscCall(ISGetMinMax(isrow, &i, &j)); 3374 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3375 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3376 } 3377 3378 /* Check if iscol has same processor distribution as mat */ 3379 sameDist[1] = PETSC_FALSE; 3380 PetscCall(ISGetLocalSize(iscol, &n)); 3381 if (!n) { 3382 sameDist[1] = PETSC_TRUE; 3383 } else { 3384 PetscCall(ISGetMinMax(iscol, &i, &j)); 3385 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3386 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3387 } 3388 3389 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3390 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3391 sameRowDist = tsameDist[0]; 3392 } 3393 3394 if (sameRowDist) { 3395 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3396 /* isrow and iscol have same processor distribution as mat */ 3397 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } else { /* sameRowDist */ 3400 /* isrow has same processor distribution as mat */ 3401 if (call == MAT_INITIAL_MATRIX) { 3402 PetscBool sorted; 3403 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3404 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3405 PetscCall(ISGetSize(iscol, &i)); 3406 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3407 3408 PetscCall(ISSorted(iscol_local, &sorted)); 3409 if (sorted) { 3410 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3411 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } 3414 } else { /* call == MAT_REUSE_MATRIX */ 3415 IS iscol_sub; 3416 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3417 if (iscol_sub) { 3418 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3419 PetscFunctionReturn(PETSC_SUCCESS); 3420 } 3421 } 3422 } 3423 } 3424 3425 /* General case: iscol -> iscol_local which has global size of iscol */ 3426 if (call == MAT_REUSE_MATRIX) { 3427 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3428 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3429 } else { 3430 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3431 } 3432 3433 PetscCall(ISGetLocalSize(iscol, &csize)); 3434 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3435 3436 if (call == MAT_INITIAL_MATRIX) { 3437 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3438 PetscCall(ISDestroy(&iscol_local)); 3439 } 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 3443 /*@C 3444 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3445 and "off-diagonal" part of the matrix in CSR format. 3446 3447 Collective 3448 3449 Input Parameters: 3450 + comm - MPI communicator 3451 . A - "diagonal" portion of matrix 3452 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3453 - garray - global index of `B` columns 3454 3455 Output Parameter: 3456 . mat - the matrix, with input `A` as its local diagonal matrix 3457 3458 Level: advanced 3459 3460 Notes: 3461 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3462 3463 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3464 3465 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3466 @*/ 3467 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3468 { 3469 Mat_MPIAIJ *maij; 3470 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3471 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3472 const PetscScalar *oa; 3473 Mat Bnew; 3474 PetscInt m, n, N; 3475 MatType mpi_mat_type; 3476 3477 PetscFunctionBegin; 3478 PetscCall(MatCreate(comm, mat)); 3479 PetscCall(MatGetSize(A, &m, &n)); 3480 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3481 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3482 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3483 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3484 3485 /* Get global columns of mat */ 3486 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3487 3488 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3489 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3490 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3491 PetscCall(MatSetType(*mat, mpi_mat_type)); 3492 3493 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3494 maij = (Mat_MPIAIJ *)(*mat)->data; 3495 3496 (*mat)->preallocated = PETSC_TRUE; 3497 3498 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3499 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3500 3501 /* Set A as diagonal portion of *mat */ 3502 maij->A = A; 3503 3504 nz = oi[m]; 3505 for (i = 0; i < nz; i++) { 3506 col = oj[i]; 3507 oj[i] = garray[col]; 3508 } 3509 3510 /* Set Bnew as off-diagonal portion of *mat */ 3511 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3512 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3513 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3514 bnew = (Mat_SeqAIJ *)Bnew->data; 3515 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3516 maij->B = Bnew; 3517 3518 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3519 3520 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3521 b->free_a = PETSC_FALSE; 3522 b->free_ij = PETSC_FALSE; 3523 PetscCall(MatDestroy(&B)); 3524 3525 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3526 bnew->free_a = PETSC_TRUE; 3527 bnew->free_ij = PETSC_TRUE; 3528 3529 /* condense columns of maij->B */ 3530 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3531 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3532 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3533 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3534 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3539 3540 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3541 { 3542 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3543 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3544 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3545 Mat M, Msub, B = a->B; 3546 MatScalar *aa; 3547 Mat_SeqAIJ *aij; 3548 PetscInt *garray = a->garray, *colsub, Ncols; 3549 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3550 IS iscol_sub, iscmap; 3551 const PetscInt *is_idx, *cmap; 3552 PetscBool allcolumns = PETSC_FALSE; 3553 MPI_Comm comm; 3554 3555 PetscFunctionBegin; 3556 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3557 if (call == MAT_REUSE_MATRIX) { 3558 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3559 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3560 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3561 3562 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3563 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3564 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3566 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3567 3568 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3569 3570 } else { /* call == MAT_INITIAL_MATRIX) */ 3571 PetscBool flg; 3572 3573 PetscCall(ISGetLocalSize(iscol, &n)); 3574 PetscCall(ISGetSize(iscol, &Ncols)); 3575 3576 /* (1) iscol -> nonscalable iscol_local */ 3577 /* Check for special case: each processor gets entire matrix columns */ 3578 PetscCall(ISIdentity(iscol_local, &flg)); 3579 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3580 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3581 if (allcolumns) { 3582 iscol_sub = iscol_local; 3583 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3584 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3585 3586 } else { 3587 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3588 PetscInt *idx, *cmap1, k; 3589 PetscCall(PetscMalloc1(Ncols, &idx)); 3590 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3591 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3592 count = 0; 3593 k = 0; 3594 for (i = 0; i < Ncols; i++) { 3595 j = is_idx[i]; 3596 if (j >= cstart && j < cend) { 3597 /* diagonal part of mat */ 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } else if (Bn) { 3601 /* off-diagonal part of mat */ 3602 if (j == garray[k]) { 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (j > garray[k]) { 3606 while (j > garray[k] && k < Bn - 1) k++; 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } 3611 } 3612 } 3613 } 3614 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3615 3616 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3617 PetscCall(ISGetBlockSize(iscol, &cbs)); 3618 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3619 3620 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3621 } 3622 3623 /* (3) Create sequential Msub */ 3624 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3625 } 3626 3627 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3628 aij = (Mat_SeqAIJ *)(Msub)->data; 3629 ii = aij->i; 3630 PetscCall(ISGetIndices(iscmap, &cmap)); 3631 3632 /* 3633 m - number of local rows 3634 Ncols - number of columns (same on all processors) 3635 rstart - first row in new global matrix generated 3636 */ 3637 PetscCall(MatGetSize(Msub, &m, NULL)); 3638 3639 if (call == MAT_INITIAL_MATRIX) { 3640 /* (4) Create parallel newmat */ 3641 PetscMPIInt rank, size; 3642 PetscInt csize; 3643 3644 PetscCallMPI(MPI_Comm_size(comm, &size)); 3645 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3646 3647 /* 3648 Determine the number of non-zeros in the diagonal and off-diagonal 3649 portions of the matrix in order to do correct preallocation 3650 */ 3651 3652 /* first get start and end of "diagonal" columns */ 3653 PetscCall(ISGetLocalSize(iscol, &csize)); 3654 if (csize == PETSC_DECIDE) { 3655 PetscCall(ISGetSize(isrow, &mglobal)); 3656 if (mglobal == Ncols) { /* square matrix */ 3657 nlocal = m; 3658 } else { 3659 nlocal = Ncols / size + ((Ncols % size) > rank); 3660 } 3661 } else { 3662 nlocal = csize; 3663 } 3664 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3665 rstart = rend - nlocal; 3666 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3667 3668 /* next, compute all the lengths */ 3669 jj = aij->j; 3670 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3671 olens = dlens + m; 3672 for (i = 0; i < m; i++) { 3673 jend = ii[i + 1] - ii[i]; 3674 olen = 0; 3675 dlen = 0; 3676 for (j = 0; j < jend; j++) { 3677 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3678 else dlen++; 3679 jj++; 3680 } 3681 olens[i] = olen; 3682 dlens[i] = dlen; 3683 } 3684 3685 PetscCall(ISGetBlockSize(isrow, &bs)); 3686 PetscCall(ISGetBlockSize(iscol, &cbs)); 3687 3688 PetscCall(MatCreate(comm, &M)); 3689 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3690 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3691 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3692 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3693 PetscCall(PetscFree(dlens)); 3694 3695 } else { /* call == MAT_REUSE_MATRIX */ 3696 M = *newmat; 3697 PetscCall(MatGetLocalSize(M, &i, NULL)); 3698 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3699 PetscCall(MatZeroEntries(M)); 3700 /* 3701 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3702 rather than the slower MatSetValues(). 3703 */ 3704 M->was_assembled = PETSC_TRUE; 3705 M->assembled = PETSC_FALSE; 3706 } 3707 3708 /* (5) Set values of Msub to *newmat */ 3709 PetscCall(PetscMalloc1(count, &colsub)); 3710 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3711 3712 jj = aij->j; 3713 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3714 for (i = 0; i < m; i++) { 3715 row = rstart + i; 3716 nz = ii[i + 1] - ii[i]; 3717 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3718 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3719 jj += nz; 3720 aa += nz; 3721 } 3722 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3723 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3724 3725 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3726 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3727 3728 PetscCall(PetscFree(colsub)); 3729 3730 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3731 if (call == MAT_INITIAL_MATRIX) { 3732 *newmat = M; 3733 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3734 PetscCall(MatDestroy(&Msub)); 3735 3736 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3737 PetscCall(ISDestroy(&iscol_sub)); 3738 3739 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3740 PetscCall(ISDestroy(&iscmap)); 3741 3742 if (iscol_local) { 3743 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3744 PetscCall(ISDestroy(&iscol_local)); 3745 } 3746 } 3747 PetscFunctionReturn(PETSC_SUCCESS); 3748 } 3749 3750 /* 3751 Not great since it makes two copies of the submatrix, first an SeqAIJ 3752 in local and then by concatenating the local matrices the end result. 3753 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3754 3755 This requires a sequential iscol with all indices. 3756 */ 3757 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3758 { 3759 PetscMPIInt rank, size; 3760 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3761 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3762 Mat M, Mreuse; 3763 MatScalar *aa, *vwork; 3764 MPI_Comm comm; 3765 Mat_SeqAIJ *aij; 3766 PetscBool colflag, allcolumns = PETSC_FALSE; 3767 3768 PetscFunctionBegin; 3769 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3770 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3771 PetscCallMPI(MPI_Comm_size(comm, &size)); 3772 3773 /* Check for special case: each processor gets entire matrix columns */ 3774 PetscCall(ISIdentity(iscol, &colflag)); 3775 PetscCall(ISGetLocalSize(iscol, &n)); 3776 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3777 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3778 3779 if (call == MAT_REUSE_MATRIX) { 3780 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3781 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3782 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3783 } else { 3784 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3785 } 3786 3787 /* 3788 m - number of local rows 3789 n - number of columns (same on all processors) 3790 rstart - first row in new global matrix generated 3791 */ 3792 PetscCall(MatGetSize(Mreuse, &m, &n)); 3793 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3794 if (call == MAT_INITIAL_MATRIX) { 3795 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3796 ii = aij->i; 3797 jj = aij->j; 3798 3799 /* 3800 Determine the number of non-zeros in the diagonal and off-diagonal 3801 portions of the matrix in order to do correct preallocation 3802 */ 3803 3804 /* first get start and end of "diagonal" columns */ 3805 if (csize == PETSC_DECIDE) { 3806 PetscCall(ISGetSize(isrow, &mglobal)); 3807 if (mglobal == n) { /* square matrix */ 3808 nlocal = m; 3809 } else { 3810 nlocal = n / size + ((n % size) > rank); 3811 } 3812 } else { 3813 nlocal = csize; 3814 } 3815 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3816 rstart = rend - nlocal; 3817 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3818 3819 /* next, compute all the lengths */ 3820 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3821 olens = dlens + m; 3822 for (i = 0; i < m; i++) { 3823 jend = ii[i + 1] - ii[i]; 3824 olen = 0; 3825 dlen = 0; 3826 for (j = 0; j < jend; j++) { 3827 if (*jj < rstart || *jj >= rend) olen++; 3828 else dlen++; 3829 jj++; 3830 } 3831 olens[i] = olen; 3832 dlens[i] = dlen; 3833 } 3834 PetscCall(MatCreate(comm, &M)); 3835 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3836 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3837 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3838 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3839 PetscCall(PetscFree(dlens)); 3840 } else { 3841 PetscInt ml, nl; 3842 3843 M = *newmat; 3844 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3845 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3846 PetscCall(MatZeroEntries(M)); 3847 /* 3848 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3849 rather than the slower MatSetValues(). 3850 */ 3851 M->was_assembled = PETSC_TRUE; 3852 M->assembled = PETSC_FALSE; 3853 } 3854 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3855 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3856 ii = aij->i; 3857 jj = aij->j; 3858 3859 /* trigger copy to CPU if needed */ 3860 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3861 for (i = 0; i < m; i++) { 3862 row = rstart + i; 3863 nz = ii[i + 1] - ii[i]; 3864 cwork = jj; 3865 jj = PetscSafePointerPlusOffset(jj, nz); 3866 vwork = aa; 3867 aa = PetscSafePointerPlusOffset(aa, nz); 3868 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3869 } 3870 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3871 3872 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3873 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3874 *newmat = M; 3875 3876 /* save submatrix used in processor for next request */ 3877 if (call == MAT_INITIAL_MATRIX) { 3878 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3879 PetscCall(MatDestroy(&Mreuse)); 3880 } 3881 PetscFunctionReturn(PETSC_SUCCESS); 3882 } 3883 3884 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3885 { 3886 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3887 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3888 const PetscInt *JJ; 3889 PetscBool nooffprocentries; 3890 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3891 3892 PetscFunctionBegin; 3893 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3894 3895 PetscCall(PetscLayoutSetUp(B->rmap)); 3896 PetscCall(PetscLayoutSetUp(B->cmap)); 3897 m = B->rmap->n; 3898 cstart = B->cmap->rstart; 3899 cend = B->cmap->rend; 3900 rstart = B->rmap->rstart; 3901 3902 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3903 3904 if (PetscDefined(USE_DEBUG)) { 3905 for (i = 0; i < m; i++) { 3906 nnz = Ii[i + 1] - Ii[i]; 3907 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3908 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3909 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3910 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3911 } 3912 } 3913 3914 for (i = 0; i < m; i++) { 3915 nnz = Ii[i + 1] - Ii[i]; 3916 JJ = PetscSafePointerPlusOffset(J, Ii[i]); 3917 nnz_max = PetscMax(nnz_max, nnz); 3918 d = 0; 3919 for (j = 0; j < nnz; j++) { 3920 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3921 } 3922 d_nnz[i] = d; 3923 o_nnz[i] = nnz - d; 3924 } 3925 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3926 PetscCall(PetscFree2(d_nnz, o_nnz)); 3927 3928 for (i = 0; i < m; i++) { 3929 ii = i + rstart; 3930 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i]), PetscSafePointerPlusOffset(v, Ii[i]), INSERT_VALUES)); 3931 } 3932 nooffprocentries = B->nooffprocentries; 3933 B->nooffprocentries = PETSC_TRUE; 3934 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3935 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3936 B->nooffprocentries = nooffprocentries; 3937 3938 /* count number of entries below block diagonal */ 3939 PetscCall(PetscFree(Aij->ld)); 3940 PetscCall(PetscCalloc1(m, &ld)); 3941 Aij->ld = ld; 3942 for (i = 0; i < m; i++) { 3943 nnz = Ii[i + 1] - Ii[i]; 3944 j = 0; 3945 while (j < nnz && J[j] < cstart) j++; 3946 ld[i] = j; 3947 if (J) J += nnz; 3948 } 3949 3950 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3951 PetscFunctionReturn(PETSC_SUCCESS); 3952 } 3953 3954 /*@ 3955 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3956 (the default parallel PETSc format). 3957 3958 Collective 3959 3960 Input Parameters: 3961 + B - the matrix 3962 . i - the indices into `j` for the start of each local row (indices start with zero) 3963 . j - the column indices for each local row (indices start with zero) 3964 - v - optional values in the matrix 3965 3966 Level: developer 3967 3968 Notes: 3969 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3970 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3971 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3972 3973 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3974 3975 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3976 3977 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3978 3979 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3980 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3981 3982 The format which is used for the sparse matrix input, is equivalent to a 3983 row-major ordering.. i.e for the following matrix, the input data expected is 3984 as shown 3985 .vb 3986 1 0 0 3987 2 0 3 P0 3988 ------- 3989 4 5 6 P1 3990 3991 Process0 [P0] rows_owned=[0,1] 3992 i = {0,1,3} [size = nrow+1 = 2+1] 3993 j = {0,0,2} [size = 3] 3994 v = {1,2,3} [size = 3] 3995 3996 Process1 [P1] rows_owned=[2] 3997 i = {0,3} [size = nrow+1 = 1+1] 3998 j = {0,1,2} [size = 3] 3999 v = {4,5,6} [size = 3] 4000 .ve 4001 4002 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4003 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4004 @*/ 4005 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4006 { 4007 PetscFunctionBegin; 4008 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4009 PetscFunctionReturn(PETSC_SUCCESS); 4010 } 4011 4012 /*@C 4013 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4014 (the default parallel PETSc format). For good matrix assembly performance 4015 the user should preallocate the matrix storage by setting the parameters 4016 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4017 4018 Collective 4019 4020 Input Parameters: 4021 + B - the matrix 4022 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4023 (same value is used for all local rows) 4024 . d_nnz - array containing the number of nonzeros in the various rows of the 4025 DIAGONAL portion of the local submatrix (possibly different for each row) 4026 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4027 The size of this array is equal to the number of local rows, i.e 'm'. 4028 For matrices that will be factored, you must leave room for (and set) 4029 the diagonal entry even if it is zero. 4030 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4031 submatrix (same value is used for all local rows). 4032 - o_nnz - array containing the number of nonzeros in the various rows of the 4033 OFF-DIAGONAL portion of the local submatrix (possibly different for 4034 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4035 structure. The size of this array is equal to the number 4036 of local rows, i.e 'm'. 4037 4038 Example Usage: 4039 Consider the following 8x8 matrix with 34 non-zero values, that is 4040 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4041 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4042 as follows 4043 4044 .vb 4045 1 2 0 | 0 3 0 | 0 4 4046 Proc0 0 5 6 | 7 0 0 | 8 0 4047 9 0 10 | 11 0 0 | 12 0 4048 ------------------------------------- 4049 13 0 14 | 15 16 17 | 0 0 4050 Proc1 0 18 0 | 19 20 21 | 0 0 4051 0 0 0 | 22 23 0 | 24 0 4052 ------------------------------------- 4053 Proc2 25 26 27 | 0 0 28 | 29 0 4054 30 0 0 | 31 32 33 | 0 34 4055 .ve 4056 4057 This can be represented as a collection of submatrices as 4058 .vb 4059 A B C 4060 D E F 4061 G H I 4062 .ve 4063 4064 Where the submatrices A,B,C are owned by proc0, D,E,F are 4065 owned by proc1, G,H,I are owned by proc2. 4066 4067 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4069 The 'M','N' parameters are 8,8, and have the same values on all procs. 4070 4071 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4072 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4073 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4074 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4075 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4076 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4077 4078 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4079 allocated for every row of the local diagonal submatrix, and `o_nz` 4080 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4081 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4082 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4083 In this case, the values of `d_nz`, `o_nz` are 4084 .vb 4085 proc0 dnz = 2, o_nz = 2 4086 proc1 dnz = 3, o_nz = 2 4087 proc2 dnz = 1, o_nz = 4 4088 .ve 4089 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4090 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4091 for proc3. i.e we are using 12+15+10=37 storage locations to store 4092 34 values. 4093 4094 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4095 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4096 In the above case the values for `d_nnz`, `o_nnz` are 4097 .vb 4098 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4099 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4100 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4101 .ve 4102 Here the space allocated is sum of all the above values i.e 34, and 4103 hence pre-allocation is perfect. 4104 4105 Level: intermediate 4106 4107 Notes: 4108 If the *_nnz parameter is given then the *_nz parameter is ignored 4109 4110 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4111 storage. The stored row and column indices begin with zero. 4112 See [Sparse Matrices](sec_matsparse) for details. 4113 4114 The parallel matrix is partitioned such that the first m0 rows belong to 4115 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4116 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4117 4118 The DIAGONAL portion of the local submatrix of a processor can be defined 4119 as the submatrix which is obtained by extraction the part corresponding to 4120 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4121 first row that belongs to the processor, r2 is the last row belonging to 4122 the this processor, and c1-c2 is range of indices of the local part of a 4123 vector suitable for applying the matrix to. This is an mxn matrix. In the 4124 common case of a square matrix, the row and column ranges are the same and 4125 the DIAGONAL part is also square. The remaining portion of the local 4126 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4127 4128 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4129 4130 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4131 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4132 You can also run with the option `-info` and look for messages with the string 4133 malloc in them to see if additional memory allocation was needed. 4134 4135 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4136 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4137 @*/ 4138 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4139 { 4140 PetscFunctionBegin; 4141 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4142 PetscValidType(B, 1); 4143 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4144 PetscFunctionReturn(PETSC_SUCCESS); 4145 } 4146 4147 /*@ 4148 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4149 CSR format for the local rows. 4150 4151 Collective 4152 4153 Input Parameters: 4154 + comm - MPI communicator 4155 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4156 . n - This value should be the same as the local size used in creating the 4157 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4158 calculated if `N` is given) For square matrices n is almost always `m`. 4159 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4160 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4161 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4162 . j - global column indices 4163 - a - optional matrix values 4164 4165 Output Parameter: 4166 . mat - the matrix 4167 4168 Level: intermediate 4169 4170 Notes: 4171 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4172 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4173 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4174 4175 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4176 4177 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4178 4179 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4180 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4181 4182 The format which is used for the sparse matrix input, is equivalent to a 4183 row-major ordering, i.e., for the following matrix, the input data expected is 4184 as shown 4185 .vb 4186 1 0 0 4187 2 0 3 P0 4188 ------- 4189 4 5 6 P1 4190 4191 Process0 [P0] rows_owned=[0,1] 4192 i = {0,1,3} [size = nrow+1 = 2+1] 4193 j = {0,0,2} [size = 3] 4194 v = {1,2,3} [size = 3] 4195 4196 Process1 [P1] rows_owned=[2] 4197 i = {0,3} [size = nrow+1 = 1+1] 4198 j = {0,1,2} [size = 3] 4199 v = {4,5,6} [size = 3] 4200 .ve 4201 4202 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4203 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4204 @*/ 4205 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4206 { 4207 PetscFunctionBegin; 4208 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4209 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4210 PetscCall(MatCreate(comm, mat)); 4211 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4212 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4213 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4214 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4215 PetscFunctionReturn(PETSC_SUCCESS); 4216 } 4217 4218 /*@ 4219 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4220 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4221 from `MatCreateMPIAIJWithArrays()` 4222 4223 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4224 4225 Collective 4226 4227 Input Parameters: 4228 + mat - the matrix 4229 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4230 . n - This value should be the same as the local size used in creating the 4231 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4232 calculated if N is given) For square matrices n is almost always m. 4233 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4234 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4235 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4236 . J - column indices 4237 - v - matrix values 4238 4239 Level: deprecated 4240 4241 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4242 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4243 @*/ 4244 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4245 { 4246 PetscInt nnz, i; 4247 PetscBool nooffprocentries; 4248 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4249 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4250 PetscScalar *ad, *ao; 4251 PetscInt ldi, Iii, md; 4252 const PetscInt *Adi = Ad->i; 4253 PetscInt *ld = Aij->ld; 4254 4255 PetscFunctionBegin; 4256 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4257 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4258 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4260 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4262 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4263 4264 for (i = 0; i < m; i++) { 4265 if (PetscDefined(USE_DEBUG)) { 4266 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4267 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4268 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4269 } 4270 } 4271 nnz = Ii[i + 1] - Ii[i]; 4272 Iii = Ii[i]; 4273 ldi = ld[i]; 4274 md = Adi[i + 1] - Adi[i]; 4275 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4276 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4277 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4278 ad += md; 4279 ao += nnz - md; 4280 } 4281 nooffprocentries = mat->nooffprocentries; 4282 mat->nooffprocentries = PETSC_TRUE; 4283 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4284 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4285 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4287 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4288 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4289 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4290 mat->nooffprocentries = nooffprocentries; 4291 PetscFunctionReturn(PETSC_SUCCESS); 4292 } 4293 4294 /*@ 4295 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4296 4297 Collective 4298 4299 Input Parameters: 4300 + mat - the matrix 4301 - v - matrix values, stored by row 4302 4303 Level: intermediate 4304 4305 Notes: 4306 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4307 4308 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4309 4310 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4311 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4312 @*/ 4313 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4314 { 4315 PetscInt nnz, i, m; 4316 PetscBool nooffprocentries; 4317 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4318 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4319 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4320 PetscScalar *ad, *ao; 4321 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4322 PetscInt ldi, Iii, md; 4323 PetscInt *ld = Aij->ld; 4324 4325 PetscFunctionBegin; 4326 m = mat->rmap->n; 4327 4328 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4329 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4330 Iii = 0; 4331 for (i = 0; i < m; i++) { 4332 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4333 ldi = ld[i]; 4334 md = Adi[i + 1] - Adi[i]; 4335 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4336 ad += md; 4337 if (ao) { 4338 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4339 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4340 ao += nnz - md; 4341 } 4342 Iii += nnz; 4343 } 4344 nooffprocentries = mat->nooffprocentries; 4345 mat->nooffprocentries = PETSC_TRUE; 4346 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4347 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4348 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4350 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4351 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4352 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4353 mat->nooffprocentries = nooffprocentries; 4354 PetscFunctionReturn(PETSC_SUCCESS); 4355 } 4356 4357 /*@C 4358 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4359 (the default parallel PETSc format). For good matrix assembly performance 4360 the user should preallocate the matrix storage by setting the parameters 4361 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4362 4363 Collective 4364 4365 Input Parameters: 4366 + comm - MPI communicator 4367 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4368 This value should be the same as the local size used in creating the 4369 y vector for the matrix-vector product y = Ax. 4370 . n - This value should be the same as the local size used in creating the 4371 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4372 calculated if N is given) For square matrices n is almost always m. 4373 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4374 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4375 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4376 (same value is used for all local rows) 4377 . d_nnz - array containing the number of nonzeros in the various rows of the 4378 DIAGONAL portion of the local submatrix (possibly different for each row) 4379 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4380 The size of this array is equal to the number of local rows, i.e 'm'. 4381 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4382 submatrix (same value is used for all local rows). 4383 - o_nnz - array containing the number of nonzeros in the various rows of the 4384 OFF-DIAGONAL portion of the local submatrix (possibly different for 4385 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4386 structure. The size of this array is equal to the number 4387 of local rows, i.e 'm'. 4388 4389 Output Parameter: 4390 . A - the matrix 4391 4392 Options Database Keys: 4393 + -mat_no_inode - Do not use inodes 4394 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4395 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4396 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4397 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4398 4399 Level: intermediate 4400 4401 Notes: 4402 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4403 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4404 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4405 4406 If the *_nnz parameter is given then the *_nz parameter is ignored 4407 4408 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4409 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4410 storage requirements for this matrix. 4411 4412 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4413 processor than it must be used on all processors that share the object for 4414 that argument. 4415 4416 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4417 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4418 4419 The user MUST specify either the local or global matrix dimensions 4420 (possibly both). 4421 4422 The parallel matrix is partitioned across processors such that the 4423 first `m0` rows belong to process 0, the next `m1` rows belong to 4424 process 1, the next `m2` rows belong to process 2, etc., where 4425 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4426 values corresponding to [m x N] submatrix. 4427 4428 The columns are logically partitioned with the n0 columns belonging 4429 to 0th partition, the next n1 columns belonging to the next 4430 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4431 4432 The DIAGONAL portion of the local submatrix on any given processor 4433 is the submatrix corresponding to the rows and columns m,n 4434 corresponding to the given processor. i.e diagonal matrix on 4435 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4436 etc. The remaining portion of the local submatrix [m x (N-n)] 4437 constitute the OFF-DIAGONAL portion. The example below better 4438 illustrates this concept. 4439 4440 For a square global matrix we define each processor's diagonal portion 4441 to be its local rows and the corresponding columns (a square submatrix); 4442 each processor's off-diagonal portion encompasses the remainder of the 4443 local matrix (a rectangular submatrix). 4444 4445 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4446 4447 When calling this routine with a single process communicator, a matrix of 4448 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4449 type of communicator, use the construction mechanism 4450 .vb 4451 MatCreate(..., &A); 4452 MatSetType(A, MATMPIAIJ); 4453 MatSetSizes(A, m, n, M, N); 4454 MatMPIAIJSetPreallocation(A, ...); 4455 .ve 4456 4457 By default, this format uses inodes (identical nodes) when possible. 4458 We search for consecutive rows with the same nonzero structure, thereby 4459 reusing matrix information to achieve increased efficiency. 4460 4461 Example Usage: 4462 Consider the following 8x8 matrix with 34 non-zero values, that is 4463 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4464 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4465 as follows 4466 4467 .vb 4468 1 2 0 | 0 3 0 | 0 4 4469 Proc0 0 5 6 | 7 0 0 | 8 0 4470 9 0 10 | 11 0 0 | 12 0 4471 ------------------------------------- 4472 13 0 14 | 15 16 17 | 0 0 4473 Proc1 0 18 0 | 19 20 21 | 0 0 4474 0 0 0 | 22 23 0 | 24 0 4475 ------------------------------------- 4476 Proc2 25 26 27 | 0 0 28 | 29 0 4477 30 0 0 | 31 32 33 | 0 34 4478 .ve 4479 4480 This can be represented as a collection of submatrices as 4481 4482 .vb 4483 A B C 4484 D E F 4485 G H I 4486 .ve 4487 4488 Where the submatrices A,B,C are owned by proc0, D,E,F are 4489 owned by proc1, G,H,I are owned by proc2. 4490 4491 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4492 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4493 The 'M','N' parameters are 8,8, and have the same values on all procs. 4494 4495 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4496 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4497 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4498 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4499 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4500 matrix, ans [DF] as another SeqAIJ matrix. 4501 4502 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4503 allocated for every row of the local diagonal submatrix, and `o_nz` 4504 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4505 One way to choose `d_nz` and `o_nz` is to use the max nonzerors per local 4506 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4507 In this case, the values of `d_nz`,`o_nz` are 4508 .vb 4509 proc0 dnz = 2, o_nz = 2 4510 proc1 dnz = 3, o_nz = 2 4511 proc2 dnz = 1, o_nz = 4 4512 .ve 4513 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4514 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4515 for proc3. i.e we are using 12+15+10=37 storage locations to store 4516 34 values. 4517 4518 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4519 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4520 In the above case the values for d_nnz,o_nnz are 4521 .vb 4522 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4523 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4524 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4525 .ve 4526 Here the space allocated is sum of all the above values i.e 34, and 4527 hence pre-allocation is perfect. 4528 4529 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4530 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4531 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4532 @*/ 4533 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4534 { 4535 PetscMPIInt size; 4536 4537 PetscFunctionBegin; 4538 PetscCall(MatCreate(comm, A)); 4539 PetscCall(MatSetSizes(*A, m, n, M, N)); 4540 PetscCallMPI(MPI_Comm_size(comm, &size)); 4541 if (size > 1) { 4542 PetscCall(MatSetType(*A, MATMPIAIJ)); 4543 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4544 } else { 4545 PetscCall(MatSetType(*A, MATSEQAIJ)); 4546 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4547 } 4548 PetscFunctionReturn(PETSC_SUCCESS); 4549 } 4550 4551 /*MC 4552 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4553 4554 Synopsis: 4555 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4556 4557 Not Collective 4558 4559 Input Parameter: 4560 . A - the `MATMPIAIJ` matrix 4561 4562 Output Parameters: 4563 + Ad - the diagonal portion of the matrix 4564 . Ao - the off-diagonal portion of the matrix 4565 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4566 - ierr - error code 4567 4568 Level: advanced 4569 4570 Note: 4571 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4572 4573 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4574 M*/ 4575 4576 /*MC 4577 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4578 4579 Synopsis: 4580 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4581 4582 Not Collective 4583 4584 Input Parameters: 4585 + A - the `MATMPIAIJ` matrix 4586 . Ad - the diagonal portion of the matrix 4587 . Ao - the off-diagonal portion of the matrix 4588 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4589 - ierr - error code 4590 4591 Level: advanced 4592 4593 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4594 M*/ 4595 4596 /*@C 4597 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4598 4599 Not Collective 4600 4601 Input Parameter: 4602 . A - The `MATMPIAIJ` matrix 4603 4604 Output Parameters: 4605 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4606 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4607 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4608 4609 Level: intermediate 4610 4611 Note: 4612 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4613 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4614 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4615 local column numbers to global column numbers in the original matrix. 4616 4617 Fortran Notes: 4618 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4619 4620 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4621 @*/ 4622 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4623 { 4624 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4625 PetscBool flg; 4626 4627 PetscFunctionBegin; 4628 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4629 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4630 if (Ad) *Ad = a->A; 4631 if (Ao) *Ao = a->B; 4632 if (colmap) *colmap = a->garray; 4633 PetscFunctionReturn(PETSC_SUCCESS); 4634 } 4635 4636 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4637 { 4638 PetscInt m, N, i, rstart, nnz, Ii; 4639 PetscInt *indx; 4640 PetscScalar *values; 4641 MatType rootType; 4642 4643 PetscFunctionBegin; 4644 PetscCall(MatGetSize(inmat, &m, &N)); 4645 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4646 PetscInt *dnz, *onz, sum, bs, cbs; 4647 4648 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4649 /* Check sum(n) = N */ 4650 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4651 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4652 4653 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4654 rstart -= m; 4655 4656 MatPreallocateBegin(comm, m, n, dnz, onz); 4657 for (i = 0; i < m; i++) { 4658 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4659 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4660 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4661 } 4662 4663 PetscCall(MatCreate(comm, outmat)); 4664 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4665 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4666 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4667 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4668 PetscCall(MatSetType(*outmat, rootType)); 4669 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4670 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4671 MatPreallocateEnd(dnz, onz); 4672 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4673 } 4674 4675 /* numeric phase */ 4676 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4677 for (i = 0; i < m; i++) { 4678 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4679 Ii = i + rstart; 4680 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4681 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4682 } 4683 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4684 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4685 PetscFunctionReturn(PETSC_SUCCESS); 4686 } 4687 4688 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4689 { 4690 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4691 4692 PetscFunctionBegin; 4693 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4694 PetscCall(PetscFree(merge->id_r)); 4695 PetscCall(PetscFree(merge->len_s)); 4696 PetscCall(PetscFree(merge->len_r)); 4697 PetscCall(PetscFree(merge->bi)); 4698 PetscCall(PetscFree(merge->bj)); 4699 PetscCall(PetscFree(merge->buf_ri[0])); 4700 PetscCall(PetscFree(merge->buf_ri)); 4701 PetscCall(PetscFree(merge->buf_rj[0])); 4702 PetscCall(PetscFree(merge->buf_rj)); 4703 PetscCall(PetscFree(merge->coi)); 4704 PetscCall(PetscFree(merge->coj)); 4705 PetscCall(PetscFree(merge->owners_co)); 4706 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4707 PetscCall(PetscFree(merge)); 4708 PetscFunctionReturn(PETSC_SUCCESS); 4709 } 4710 4711 #include <../src/mat/utils/freespace.h> 4712 #include <petscbt.h> 4713 4714 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4715 { 4716 MPI_Comm comm; 4717 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4718 PetscMPIInt size, rank, taga, *len_s; 4719 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4720 PetscInt proc, m; 4721 PetscInt **buf_ri, **buf_rj; 4722 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4723 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4724 MPI_Request *s_waits, *r_waits; 4725 MPI_Status *status; 4726 const MatScalar *aa, *a_a; 4727 MatScalar **abuf_r, *ba_i; 4728 Mat_Merge_SeqsToMPI *merge; 4729 PetscContainer container; 4730 4731 PetscFunctionBegin; 4732 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4733 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4734 4735 PetscCallMPI(MPI_Comm_size(comm, &size)); 4736 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4737 4738 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4739 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4740 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4741 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4742 aa = a_a; 4743 4744 bi = merge->bi; 4745 bj = merge->bj; 4746 buf_ri = merge->buf_ri; 4747 buf_rj = merge->buf_rj; 4748 4749 PetscCall(PetscMalloc1(size, &status)); 4750 owners = merge->rowmap->range; 4751 len_s = merge->len_s; 4752 4753 /* send and recv matrix values */ 4754 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4755 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4756 4757 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4758 for (proc = 0, k = 0; proc < size; proc++) { 4759 if (!len_s[proc]) continue; 4760 i = owners[proc]; 4761 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4762 k++; 4763 } 4764 4765 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4766 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4767 PetscCall(PetscFree(status)); 4768 4769 PetscCall(PetscFree(s_waits)); 4770 PetscCall(PetscFree(r_waits)); 4771 4772 /* insert mat values of mpimat */ 4773 PetscCall(PetscMalloc1(N, &ba_i)); 4774 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4775 4776 for (k = 0; k < merge->nrecv; k++) { 4777 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4778 nrows = *buf_ri_k[k]; 4779 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4780 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4781 } 4782 4783 /* set values of ba */ 4784 m = merge->rowmap->n; 4785 for (i = 0; i < m; i++) { 4786 arow = owners[rank] + i; 4787 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4788 bnzi = bi[i + 1] - bi[i]; 4789 PetscCall(PetscArrayzero(ba_i, bnzi)); 4790 4791 /* add local non-zero vals of this proc's seqmat into ba */ 4792 anzi = ai[arow + 1] - ai[arow]; 4793 aj = a->j + ai[arow]; 4794 aa = a_a + ai[arow]; 4795 nextaj = 0; 4796 for (j = 0; nextaj < anzi; j++) { 4797 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4798 ba_i[j] += aa[nextaj++]; 4799 } 4800 } 4801 4802 /* add received vals into ba */ 4803 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4804 /* i-th row */ 4805 if (i == *nextrow[k]) { 4806 anzi = *(nextai[k] + 1) - *nextai[k]; 4807 aj = buf_rj[k] + *nextai[k]; 4808 aa = abuf_r[k] + *nextai[k]; 4809 nextaj = 0; 4810 for (j = 0; nextaj < anzi; j++) { 4811 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4812 ba_i[j] += aa[nextaj++]; 4813 } 4814 } 4815 nextrow[k]++; 4816 nextai[k]++; 4817 } 4818 } 4819 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4820 } 4821 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4822 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4823 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4824 4825 PetscCall(PetscFree(abuf_r[0])); 4826 PetscCall(PetscFree(abuf_r)); 4827 PetscCall(PetscFree(ba_i)); 4828 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4829 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4830 PetscFunctionReturn(PETSC_SUCCESS); 4831 } 4832 4833 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4834 { 4835 Mat B_mpi; 4836 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4837 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4838 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4839 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4840 PetscInt len, proc, *dnz, *onz, bs, cbs; 4841 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4842 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4843 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4844 MPI_Status *status; 4845 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4846 PetscBT lnkbt; 4847 Mat_Merge_SeqsToMPI *merge; 4848 PetscContainer container; 4849 4850 PetscFunctionBegin; 4851 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4852 4853 /* make sure it is a PETSc comm */ 4854 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4855 PetscCallMPI(MPI_Comm_size(comm, &size)); 4856 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4857 4858 PetscCall(PetscNew(&merge)); 4859 PetscCall(PetscMalloc1(size, &status)); 4860 4861 /* determine row ownership */ 4862 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4863 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4864 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4865 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4866 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4867 PetscCall(PetscMalloc1(size, &len_si)); 4868 PetscCall(PetscMalloc1(size, &merge->len_s)); 4869 4870 m = merge->rowmap->n; 4871 owners = merge->rowmap->range; 4872 4873 /* determine the number of messages to send, their lengths */ 4874 len_s = merge->len_s; 4875 4876 len = 0; /* length of buf_si[] */ 4877 merge->nsend = 0; 4878 for (proc = 0; proc < size; proc++) { 4879 len_si[proc] = 0; 4880 if (proc == rank) { 4881 len_s[proc] = 0; 4882 } else { 4883 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4884 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4885 } 4886 if (len_s[proc]) { 4887 merge->nsend++; 4888 nrows = 0; 4889 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4890 if (ai[i + 1] > ai[i]) nrows++; 4891 } 4892 len_si[proc] = 2 * (nrows + 1); 4893 len += len_si[proc]; 4894 } 4895 } 4896 4897 /* determine the number and length of messages to receive for ij-structure */ 4898 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4899 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4900 4901 /* post the Irecv of j-structure */ 4902 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4903 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4904 4905 /* post the Isend of j-structure */ 4906 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4907 4908 for (proc = 0, k = 0; proc < size; proc++) { 4909 if (!len_s[proc]) continue; 4910 i = owners[proc]; 4911 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4912 k++; 4913 } 4914 4915 /* receives and sends of j-structure are complete */ 4916 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4917 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4918 4919 /* send and recv i-structure */ 4920 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4921 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4922 4923 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4924 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4925 for (proc = 0, k = 0; proc < size; proc++) { 4926 if (!len_s[proc]) continue; 4927 /* form outgoing message for i-structure: 4928 buf_si[0]: nrows to be sent 4929 [1:nrows]: row index (global) 4930 [nrows+1:2*nrows+1]: i-structure index 4931 */ 4932 nrows = len_si[proc] / 2 - 1; 4933 buf_si_i = buf_si + nrows + 1; 4934 buf_si[0] = nrows; 4935 buf_si_i[0] = 0; 4936 nrows = 0; 4937 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4938 anzi = ai[i + 1] - ai[i]; 4939 if (anzi) { 4940 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4941 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4942 nrows++; 4943 } 4944 } 4945 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4946 k++; 4947 buf_si += len_si[proc]; 4948 } 4949 4950 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4951 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4952 4953 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4954 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4955 4956 PetscCall(PetscFree(len_si)); 4957 PetscCall(PetscFree(len_ri)); 4958 PetscCall(PetscFree(rj_waits)); 4959 PetscCall(PetscFree2(si_waits, sj_waits)); 4960 PetscCall(PetscFree(ri_waits)); 4961 PetscCall(PetscFree(buf_s)); 4962 PetscCall(PetscFree(status)); 4963 4964 /* compute a local seq matrix in each processor */ 4965 /* allocate bi array and free space for accumulating nonzero column info */ 4966 PetscCall(PetscMalloc1(m + 1, &bi)); 4967 bi[0] = 0; 4968 4969 /* create and initialize a linked list */ 4970 nlnk = N + 1; 4971 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4972 4973 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4974 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4975 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4976 4977 current_space = free_space; 4978 4979 /* determine symbolic info for each local row */ 4980 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4981 4982 for (k = 0; k < merge->nrecv; k++) { 4983 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4984 nrows = *buf_ri_k[k]; 4985 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4986 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4987 } 4988 4989 MatPreallocateBegin(comm, m, n, dnz, onz); 4990 len = 0; 4991 for (i = 0; i < m; i++) { 4992 bnzi = 0; 4993 /* add local non-zero cols of this proc's seqmat into lnk */ 4994 arow = owners[rank] + i; 4995 anzi = ai[arow + 1] - ai[arow]; 4996 aj = a->j + ai[arow]; 4997 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4998 bnzi += nlnk; 4999 /* add received col data into lnk */ 5000 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5001 if (i == *nextrow[k]) { /* i-th row */ 5002 anzi = *(nextai[k] + 1) - *nextai[k]; 5003 aj = buf_rj[k] + *nextai[k]; 5004 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5005 bnzi += nlnk; 5006 nextrow[k]++; 5007 nextai[k]++; 5008 } 5009 } 5010 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5011 5012 /* if free space is not available, make more free space */ 5013 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5014 /* copy data into free space, then initialize lnk */ 5015 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5016 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5017 5018 current_space->array += bnzi; 5019 current_space->local_used += bnzi; 5020 current_space->local_remaining -= bnzi; 5021 5022 bi[i + 1] = bi[i] + bnzi; 5023 } 5024 5025 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5026 5027 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5028 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5029 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5030 5031 /* create symbolic parallel matrix B_mpi */ 5032 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5033 PetscCall(MatCreate(comm, &B_mpi)); 5034 if (n == PETSC_DECIDE) { 5035 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5036 } else { 5037 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5038 } 5039 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5040 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5041 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5042 MatPreallocateEnd(dnz, onz); 5043 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5044 5045 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5046 B_mpi->assembled = PETSC_FALSE; 5047 merge->bi = bi; 5048 merge->bj = bj; 5049 merge->buf_ri = buf_ri; 5050 merge->buf_rj = buf_rj; 5051 merge->coi = NULL; 5052 merge->coj = NULL; 5053 merge->owners_co = NULL; 5054 5055 PetscCall(PetscCommDestroy(&comm)); 5056 5057 /* attach the supporting struct to B_mpi for reuse */ 5058 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5059 PetscCall(PetscContainerSetPointer(container, merge)); 5060 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5061 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5062 PetscCall(PetscContainerDestroy(&container)); 5063 *mpimat = B_mpi; 5064 5065 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5066 PetscFunctionReturn(PETSC_SUCCESS); 5067 } 5068 5069 /*@C 5070 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5071 matrices from each processor 5072 5073 Collective 5074 5075 Input Parameters: 5076 + comm - the communicators the parallel matrix will live on 5077 . seqmat - the input sequential matrices 5078 . m - number of local rows (or `PETSC_DECIDE`) 5079 . n - number of local columns (or `PETSC_DECIDE`) 5080 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5081 5082 Output Parameter: 5083 . mpimat - the parallel matrix generated 5084 5085 Level: advanced 5086 5087 Note: 5088 The dimensions of the sequential matrix in each processor MUST be the same. 5089 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5090 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5091 5092 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5093 @*/ 5094 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5095 { 5096 PetscMPIInt size; 5097 5098 PetscFunctionBegin; 5099 PetscCallMPI(MPI_Comm_size(comm, &size)); 5100 if (size == 1) { 5101 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5102 if (scall == MAT_INITIAL_MATRIX) { 5103 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5104 } else { 5105 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5106 } 5107 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5108 PetscFunctionReturn(PETSC_SUCCESS); 5109 } 5110 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5111 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5112 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5113 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5114 PetscFunctionReturn(PETSC_SUCCESS); 5115 } 5116 5117 /*@ 5118 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5119 5120 Not Collective 5121 5122 Input Parameter: 5123 . A - the matrix 5124 5125 Output Parameter: 5126 . A_loc - the local sequential matrix generated 5127 5128 Level: developer 5129 5130 Notes: 5131 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5132 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5133 `n` is the global column count obtained with `MatGetSize()` 5134 5135 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5136 5137 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5138 5139 Destroy the matrix with `MatDestroy()` 5140 5141 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5142 @*/ 5143 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5144 { 5145 PetscBool mpi; 5146 5147 PetscFunctionBegin; 5148 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5149 if (mpi) { 5150 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5151 } else { 5152 *A_loc = A; 5153 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5154 } 5155 PetscFunctionReturn(PETSC_SUCCESS); 5156 } 5157 5158 /*@ 5159 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5160 5161 Not Collective 5162 5163 Input Parameters: 5164 + A - the matrix 5165 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5166 5167 Output Parameter: 5168 . A_loc - the local sequential matrix generated 5169 5170 Level: developer 5171 5172 Notes: 5173 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5174 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5175 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5176 5177 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5178 5179 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5180 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5181 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5182 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5183 5184 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5185 @*/ 5186 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5187 { 5188 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5189 Mat_SeqAIJ *mat, *a, *b; 5190 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5191 const PetscScalar *aa, *ba, *aav, *bav; 5192 PetscScalar *ca, *cam; 5193 PetscMPIInt size; 5194 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5195 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5196 PetscBool match; 5197 5198 PetscFunctionBegin; 5199 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5200 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5201 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5202 if (size == 1) { 5203 if (scall == MAT_INITIAL_MATRIX) { 5204 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5205 *A_loc = mpimat->A; 5206 } else if (scall == MAT_REUSE_MATRIX) { 5207 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5208 } 5209 PetscFunctionReturn(PETSC_SUCCESS); 5210 } 5211 5212 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5213 a = (Mat_SeqAIJ *)mpimat->A->data; 5214 b = (Mat_SeqAIJ *)mpimat->B->data; 5215 ai = a->i; 5216 aj = a->j; 5217 bi = b->i; 5218 bj = b->j; 5219 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5220 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5221 aa = aav; 5222 ba = bav; 5223 if (scall == MAT_INITIAL_MATRIX) { 5224 PetscCall(PetscMalloc1(1 + am, &ci)); 5225 ci[0] = 0; 5226 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5227 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5228 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5229 k = 0; 5230 for (i = 0; i < am; i++) { 5231 ncols_o = bi[i + 1] - bi[i]; 5232 ncols_d = ai[i + 1] - ai[i]; 5233 /* off-diagonal portion of A */ 5234 for (jo = 0; jo < ncols_o; jo++) { 5235 col = cmap[*bj]; 5236 if (col >= cstart) break; 5237 cj[k] = col; 5238 bj++; 5239 ca[k++] = *ba++; 5240 } 5241 /* diagonal portion of A */ 5242 for (j = 0; j < ncols_d; j++) { 5243 cj[k] = cstart + *aj++; 5244 ca[k++] = *aa++; 5245 } 5246 /* off-diagonal portion of A */ 5247 for (j = jo; j < ncols_o; j++) { 5248 cj[k] = cmap[*bj++]; 5249 ca[k++] = *ba++; 5250 } 5251 } 5252 /* put together the new matrix */ 5253 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5254 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5255 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5256 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5257 mat->free_a = PETSC_TRUE; 5258 mat->free_ij = PETSC_TRUE; 5259 mat->nonew = 0; 5260 } else if (scall == MAT_REUSE_MATRIX) { 5261 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5262 ci = mat->i; 5263 cj = mat->j; 5264 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5265 for (i = 0; i < am; i++) { 5266 /* off-diagonal portion of A */ 5267 ncols_o = bi[i + 1] - bi[i]; 5268 for (jo = 0; jo < ncols_o; jo++) { 5269 col = cmap[*bj]; 5270 if (col >= cstart) break; 5271 *cam++ = *ba++; 5272 bj++; 5273 } 5274 /* diagonal portion of A */ 5275 ncols_d = ai[i + 1] - ai[i]; 5276 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5277 /* off-diagonal portion of A */ 5278 for (j = jo; j < ncols_o; j++) { 5279 *cam++ = *ba++; 5280 bj++; 5281 } 5282 } 5283 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5284 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5285 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5286 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5287 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5288 PetscFunctionReturn(PETSC_SUCCESS); 5289 } 5290 5291 /*@ 5292 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5293 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5294 5295 Not Collective 5296 5297 Input Parameters: 5298 + A - the matrix 5299 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5300 5301 Output Parameters: 5302 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5303 - A_loc - the local sequential matrix generated 5304 5305 Level: developer 5306 5307 Note: 5308 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5309 part, then those associated with the off-diagonal part (in its local ordering) 5310 5311 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5312 @*/ 5313 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5314 { 5315 Mat Ao, Ad; 5316 const PetscInt *cmap; 5317 PetscMPIInt size; 5318 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5319 5320 PetscFunctionBegin; 5321 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5322 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5323 if (size == 1) { 5324 if (scall == MAT_INITIAL_MATRIX) { 5325 PetscCall(PetscObjectReference((PetscObject)Ad)); 5326 *A_loc = Ad; 5327 } else if (scall == MAT_REUSE_MATRIX) { 5328 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5329 } 5330 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5331 PetscFunctionReturn(PETSC_SUCCESS); 5332 } 5333 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5334 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5335 if (f) { 5336 PetscCall((*f)(A, scall, glob, A_loc)); 5337 } else { 5338 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5339 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5340 Mat_SeqAIJ *c; 5341 PetscInt *ai = a->i, *aj = a->j; 5342 PetscInt *bi = b->i, *bj = b->j; 5343 PetscInt *ci, *cj; 5344 const PetscScalar *aa, *ba; 5345 PetscScalar *ca; 5346 PetscInt i, j, am, dn, on; 5347 5348 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5349 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5350 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5351 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5352 if (scall == MAT_INITIAL_MATRIX) { 5353 PetscInt k; 5354 PetscCall(PetscMalloc1(1 + am, &ci)); 5355 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5356 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5357 ci[0] = 0; 5358 for (i = 0, k = 0; i < am; i++) { 5359 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5360 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5361 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5362 /* diagonal portion of A */ 5363 for (j = 0; j < ncols_d; j++, k++) { 5364 cj[k] = *aj++; 5365 ca[k] = *aa++; 5366 } 5367 /* off-diagonal portion of A */ 5368 for (j = 0; j < ncols_o; j++, k++) { 5369 cj[k] = dn + *bj++; 5370 ca[k] = *ba++; 5371 } 5372 } 5373 /* put together the new matrix */ 5374 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5375 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5376 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5377 c = (Mat_SeqAIJ *)(*A_loc)->data; 5378 c->free_a = PETSC_TRUE; 5379 c->free_ij = PETSC_TRUE; 5380 c->nonew = 0; 5381 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5382 } else if (scall == MAT_REUSE_MATRIX) { 5383 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5384 for (i = 0; i < am; i++) { 5385 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5386 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5387 /* diagonal portion of A */ 5388 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5389 /* off-diagonal portion of A */ 5390 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5391 } 5392 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5393 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5394 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5395 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5396 if (glob) { 5397 PetscInt cst, *gidx; 5398 5399 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5400 PetscCall(PetscMalloc1(dn + on, &gidx)); 5401 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5402 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5403 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5404 } 5405 } 5406 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5407 PetscFunctionReturn(PETSC_SUCCESS); 5408 } 5409 5410 /*@C 5411 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5412 5413 Not Collective 5414 5415 Input Parameters: 5416 + A - the matrix 5417 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5418 . row - index set of rows to extract (or `NULL`) 5419 - col - index set of columns to extract (or `NULL`) 5420 5421 Output Parameter: 5422 . A_loc - the local sequential matrix generated 5423 5424 Level: developer 5425 5426 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5427 @*/ 5428 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5429 { 5430 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5431 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5432 IS isrowa, iscola; 5433 Mat *aloc; 5434 PetscBool match; 5435 5436 PetscFunctionBegin; 5437 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5438 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5439 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5440 if (!row) { 5441 start = A->rmap->rstart; 5442 end = A->rmap->rend; 5443 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5444 } else { 5445 isrowa = *row; 5446 } 5447 if (!col) { 5448 start = A->cmap->rstart; 5449 cmap = a->garray; 5450 nzA = a->A->cmap->n; 5451 nzB = a->B->cmap->n; 5452 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5453 ncols = 0; 5454 for (i = 0; i < nzB; i++) { 5455 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5456 else break; 5457 } 5458 imark = i; 5459 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5460 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5461 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5462 } else { 5463 iscola = *col; 5464 } 5465 if (scall != MAT_INITIAL_MATRIX) { 5466 PetscCall(PetscMalloc1(1, &aloc)); 5467 aloc[0] = *A_loc; 5468 } 5469 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5470 if (!col) { /* attach global id of condensed columns */ 5471 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5472 } 5473 *A_loc = aloc[0]; 5474 PetscCall(PetscFree(aloc)); 5475 if (!row) PetscCall(ISDestroy(&isrowa)); 5476 if (!col) PetscCall(ISDestroy(&iscola)); 5477 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5478 PetscFunctionReturn(PETSC_SUCCESS); 5479 } 5480 5481 /* 5482 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5483 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5484 * on a global size. 5485 * */ 5486 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5487 { 5488 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5489 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5490 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5491 PetscMPIInt owner; 5492 PetscSFNode *iremote, *oiremote; 5493 const PetscInt *lrowindices; 5494 PetscSF sf, osf; 5495 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5496 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5497 MPI_Comm comm; 5498 ISLocalToGlobalMapping mapping; 5499 const PetscScalar *pd_a, *po_a; 5500 5501 PetscFunctionBegin; 5502 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5503 /* plocalsize is the number of roots 5504 * nrows is the number of leaves 5505 * */ 5506 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5507 PetscCall(ISGetLocalSize(rows, &nrows)); 5508 PetscCall(PetscCalloc1(nrows, &iremote)); 5509 PetscCall(ISGetIndices(rows, &lrowindices)); 5510 for (i = 0; i < nrows; i++) { 5511 /* Find a remote index and an owner for a row 5512 * The row could be local or remote 5513 * */ 5514 owner = 0; 5515 lidx = 0; 5516 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5517 iremote[i].index = lidx; 5518 iremote[i].rank = owner; 5519 } 5520 /* Create SF to communicate how many nonzero columns for each row */ 5521 PetscCall(PetscSFCreate(comm, &sf)); 5522 /* SF will figure out the number of nonzero columns for each row, and their 5523 * offsets 5524 * */ 5525 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5526 PetscCall(PetscSFSetFromOptions(sf)); 5527 PetscCall(PetscSFSetUp(sf)); 5528 5529 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5530 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5531 PetscCall(PetscCalloc1(nrows, &pnnz)); 5532 roffsets[0] = 0; 5533 roffsets[1] = 0; 5534 for (i = 0; i < plocalsize; i++) { 5535 /* diagonal */ 5536 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5537 /* off-diagonal */ 5538 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5539 /* compute offsets so that we relative location for each row */ 5540 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5541 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5542 } 5543 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5544 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5545 /* 'r' means root, and 'l' means leaf */ 5546 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5547 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5548 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5549 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5550 PetscCall(PetscSFDestroy(&sf)); 5551 PetscCall(PetscFree(roffsets)); 5552 PetscCall(PetscFree(nrcols)); 5553 dntotalcols = 0; 5554 ontotalcols = 0; 5555 ncol = 0; 5556 for (i = 0; i < nrows; i++) { 5557 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5558 ncol = PetscMax(pnnz[i], ncol); 5559 /* diagonal */ 5560 dntotalcols += nlcols[i * 2 + 0]; 5561 /* off-diagonal */ 5562 ontotalcols += nlcols[i * 2 + 1]; 5563 } 5564 /* We do not need to figure the right number of columns 5565 * since all the calculations will be done by going through the raw data 5566 * */ 5567 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5568 PetscCall(MatSetUp(*P_oth)); 5569 PetscCall(PetscFree(pnnz)); 5570 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5571 /* diagonal */ 5572 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5573 /* off-diagonal */ 5574 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5575 /* diagonal */ 5576 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5577 /* off-diagonal */ 5578 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5579 dntotalcols = 0; 5580 ontotalcols = 0; 5581 ntotalcols = 0; 5582 for (i = 0; i < nrows; i++) { 5583 owner = 0; 5584 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5585 /* Set iremote for diag matrix */ 5586 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5587 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5588 iremote[dntotalcols].rank = owner; 5589 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5590 ilocal[dntotalcols++] = ntotalcols++; 5591 } 5592 /* off-diagonal */ 5593 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5594 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5595 oiremote[ontotalcols].rank = owner; 5596 oilocal[ontotalcols++] = ntotalcols++; 5597 } 5598 } 5599 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5600 PetscCall(PetscFree(loffsets)); 5601 PetscCall(PetscFree(nlcols)); 5602 PetscCall(PetscSFCreate(comm, &sf)); 5603 /* P serves as roots and P_oth is leaves 5604 * Diag matrix 5605 * */ 5606 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5607 PetscCall(PetscSFSetFromOptions(sf)); 5608 PetscCall(PetscSFSetUp(sf)); 5609 5610 PetscCall(PetscSFCreate(comm, &osf)); 5611 /* off-diagonal */ 5612 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5613 PetscCall(PetscSFSetFromOptions(osf)); 5614 PetscCall(PetscSFSetUp(osf)); 5615 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5616 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5617 /* operate on the matrix internal data to save memory */ 5618 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5619 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5620 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5621 /* Convert to global indices for diag matrix */ 5622 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5623 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5624 /* We want P_oth store global indices */ 5625 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5626 /* Use memory scalable approach */ 5627 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5628 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5629 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5630 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5631 /* Convert back to local indices */ 5632 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5633 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5634 nout = 0; 5635 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5636 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5637 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5638 /* Exchange values */ 5639 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5640 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5641 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5642 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5643 /* Stop PETSc from shrinking memory */ 5644 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5645 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5646 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5647 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5648 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5649 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5650 PetscCall(PetscSFDestroy(&sf)); 5651 PetscCall(PetscSFDestroy(&osf)); 5652 PetscFunctionReturn(PETSC_SUCCESS); 5653 } 5654 5655 /* 5656 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5657 * This supports MPIAIJ and MAIJ 5658 * */ 5659 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5660 { 5661 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5662 Mat_SeqAIJ *p_oth; 5663 IS rows, map; 5664 PetscHMapI hamp; 5665 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5666 MPI_Comm comm; 5667 PetscSF sf, osf; 5668 PetscBool has; 5669 5670 PetscFunctionBegin; 5671 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5672 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5673 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5674 * and then create a submatrix (that often is an overlapping matrix) 5675 * */ 5676 if (reuse == MAT_INITIAL_MATRIX) { 5677 /* Use a hash table to figure out unique keys */ 5678 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5679 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5680 count = 0; 5681 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5682 for (i = 0; i < a->B->cmap->n; i++) { 5683 key = a->garray[i] / dof; 5684 PetscCall(PetscHMapIHas(hamp, key, &has)); 5685 if (!has) { 5686 mapping[i] = count; 5687 PetscCall(PetscHMapISet(hamp, key, count++)); 5688 } else { 5689 /* Current 'i' has the same value the previous step */ 5690 mapping[i] = count - 1; 5691 } 5692 } 5693 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5694 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5695 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5696 PetscCall(PetscCalloc1(htsize, &rowindices)); 5697 off = 0; 5698 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5699 PetscCall(PetscHMapIDestroy(&hamp)); 5700 PetscCall(PetscSortInt(htsize, rowindices)); 5701 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5702 /* In case, the matrix was already created but users want to recreate the matrix */ 5703 PetscCall(MatDestroy(P_oth)); 5704 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5705 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5706 PetscCall(ISDestroy(&map)); 5707 PetscCall(ISDestroy(&rows)); 5708 } else if (reuse == MAT_REUSE_MATRIX) { 5709 /* If matrix was already created, we simply update values using SF objects 5710 * that as attached to the matrix earlier. 5711 */ 5712 const PetscScalar *pd_a, *po_a; 5713 5714 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5715 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5716 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5717 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5718 /* Update values in place */ 5719 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5720 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5721 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5722 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5723 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5724 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5725 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5726 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5727 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5728 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5729 PetscFunctionReturn(PETSC_SUCCESS); 5730 } 5731 5732 /*@C 5733 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5734 5735 Collective 5736 5737 Input Parameters: 5738 + A - the first matrix in `MATMPIAIJ` format 5739 . B - the second matrix in `MATMPIAIJ` format 5740 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5741 5742 Output Parameters: 5743 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5744 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5745 - B_seq - the sequential matrix generated 5746 5747 Level: developer 5748 5749 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5750 @*/ 5751 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5752 { 5753 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5754 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5755 IS isrowb, iscolb; 5756 Mat *bseq = NULL; 5757 5758 PetscFunctionBegin; 5759 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5760 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5761 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5762 5763 if (scall == MAT_INITIAL_MATRIX) { 5764 start = A->cmap->rstart; 5765 cmap = a->garray; 5766 nzA = a->A->cmap->n; 5767 nzB = a->B->cmap->n; 5768 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5769 ncols = 0; 5770 for (i = 0; i < nzB; i++) { /* row < local row index */ 5771 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5772 else break; 5773 } 5774 imark = i; 5775 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5776 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5777 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5778 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5779 } else { 5780 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5781 isrowb = *rowb; 5782 iscolb = *colb; 5783 PetscCall(PetscMalloc1(1, &bseq)); 5784 bseq[0] = *B_seq; 5785 } 5786 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5787 *B_seq = bseq[0]; 5788 PetscCall(PetscFree(bseq)); 5789 if (!rowb) { 5790 PetscCall(ISDestroy(&isrowb)); 5791 } else { 5792 *rowb = isrowb; 5793 } 5794 if (!colb) { 5795 PetscCall(ISDestroy(&iscolb)); 5796 } else { 5797 *colb = iscolb; 5798 } 5799 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5800 PetscFunctionReturn(PETSC_SUCCESS); 5801 } 5802 5803 /* 5804 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5805 of the OFF-DIAGONAL portion of local A 5806 5807 Collective 5808 5809 Input Parameters: 5810 + A,B - the matrices in `MATMPIAIJ` format 5811 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5812 5813 Output Parameter: 5814 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5815 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5816 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5817 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5818 5819 Developer Note: 5820 This directly accesses information inside the VecScatter associated with the matrix-vector product 5821 for this matrix. This is not desirable.. 5822 5823 Level: developer 5824 5825 */ 5826 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5827 { 5828 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5829 Mat_SeqAIJ *b_oth; 5830 VecScatter ctx; 5831 MPI_Comm comm; 5832 const PetscMPIInt *rprocs, *sprocs; 5833 const PetscInt *srow, *rstarts, *sstarts; 5834 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5835 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5836 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5837 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5838 PetscMPIInt size, tag, rank, nreqs; 5839 5840 PetscFunctionBegin; 5841 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5842 PetscCallMPI(MPI_Comm_size(comm, &size)); 5843 5844 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5845 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5846 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5847 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5848 5849 if (size == 1) { 5850 startsj_s = NULL; 5851 bufa_ptr = NULL; 5852 *B_oth = NULL; 5853 PetscFunctionReturn(PETSC_SUCCESS); 5854 } 5855 5856 ctx = a->Mvctx; 5857 tag = ((PetscObject)ctx)->tag; 5858 5859 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5860 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5861 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5862 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5863 PetscCall(PetscMalloc1(nreqs, &reqs)); 5864 rwaits = reqs; 5865 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5866 5867 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5868 if (scall == MAT_INITIAL_MATRIX) { 5869 /* i-array */ 5870 /* post receives */ 5871 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5872 for (i = 0; i < nrecvs; i++) { 5873 rowlen = rvalues + rstarts[i] * rbs; 5874 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5875 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5876 } 5877 5878 /* pack the outgoing message */ 5879 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5880 5881 sstartsj[0] = 0; 5882 rstartsj[0] = 0; 5883 len = 0; /* total length of j or a array to be sent */ 5884 if (nsends) { 5885 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5886 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5887 } 5888 for (i = 0; i < nsends; i++) { 5889 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5890 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5891 for (j = 0; j < nrows; j++) { 5892 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5893 for (l = 0; l < sbs; l++) { 5894 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5895 5896 rowlen[j * sbs + l] = ncols; 5897 5898 len += ncols; 5899 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5900 } 5901 k++; 5902 } 5903 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5904 5905 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5906 } 5907 /* recvs and sends of i-array are completed */ 5908 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5909 PetscCall(PetscFree(svalues)); 5910 5911 /* allocate buffers for sending j and a arrays */ 5912 PetscCall(PetscMalloc1(len + 1, &bufj)); 5913 PetscCall(PetscMalloc1(len + 1, &bufa)); 5914 5915 /* create i-array of B_oth */ 5916 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5917 5918 b_othi[0] = 0; 5919 len = 0; /* total length of j or a array to be received */ 5920 k = 0; 5921 for (i = 0; i < nrecvs; i++) { 5922 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5923 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5924 for (j = 0; j < nrows; j++) { 5925 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5926 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5927 k++; 5928 } 5929 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5930 } 5931 PetscCall(PetscFree(rvalues)); 5932 5933 /* allocate space for j and a arrays of B_oth */ 5934 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5935 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5936 5937 /* j-array */ 5938 /* post receives of j-array */ 5939 for (i = 0; i < nrecvs; i++) { 5940 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5941 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5942 } 5943 5944 /* pack the outgoing message j-array */ 5945 if (nsends) k = sstarts[0]; 5946 for (i = 0; i < nsends; i++) { 5947 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5948 bufJ = bufj + sstartsj[i]; 5949 for (j = 0; j < nrows; j++) { 5950 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5951 for (ll = 0; ll < sbs; ll++) { 5952 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5953 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5954 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5955 } 5956 } 5957 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5958 } 5959 5960 /* recvs and sends of j-array are completed */ 5961 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5962 } else if (scall == MAT_REUSE_MATRIX) { 5963 sstartsj = *startsj_s; 5964 rstartsj = *startsj_r; 5965 bufa = *bufa_ptr; 5966 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5967 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5968 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5969 5970 /* a-array */ 5971 /* post receives of a-array */ 5972 for (i = 0; i < nrecvs; i++) { 5973 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5974 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5975 } 5976 5977 /* pack the outgoing message a-array */ 5978 if (nsends) k = sstarts[0]; 5979 for (i = 0; i < nsends; i++) { 5980 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5981 bufA = bufa + sstartsj[i]; 5982 for (j = 0; j < nrows; j++) { 5983 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5984 for (ll = 0; ll < sbs; ll++) { 5985 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5986 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5987 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5988 } 5989 } 5990 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5991 } 5992 /* recvs and sends of a-array are completed */ 5993 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5994 PetscCall(PetscFree(reqs)); 5995 5996 if (scall == MAT_INITIAL_MATRIX) { 5997 /* put together the new matrix */ 5998 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5999 6000 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6001 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6002 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6003 b_oth->free_a = PETSC_TRUE; 6004 b_oth->free_ij = PETSC_TRUE; 6005 b_oth->nonew = 0; 6006 6007 PetscCall(PetscFree(bufj)); 6008 if (!startsj_s || !bufa_ptr) { 6009 PetscCall(PetscFree2(sstartsj, rstartsj)); 6010 PetscCall(PetscFree(bufa_ptr)); 6011 } else { 6012 *startsj_s = sstartsj; 6013 *startsj_r = rstartsj; 6014 *bufa_ptr = bufa; 6015 } 6016 } else if (scall == MAT_REUSE_MATRIX) { 6017 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6018 } 6019 6020 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6021 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6022 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6023 PetscFunctionReturn(PETSC_SUCCESS); 6024 } 6025 6026 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6027 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6028 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6029 #if defined(PETSC_HAVE_MKL_SPARSE) 6030 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6031 #endif 6032 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6033 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6034 #if defined(PETSC_HAVE_ELEMENTAL) 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6036 #endif 6037 #if defined(PETSC_HAVE_SCALAPACK) 6038 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6039 #endif 6040 #if defined(PETSC_HAVE_HYPRE) 6041 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6042 #endif 6043 #if defined(PETSC_HAVE_CUDA) 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_HIP) 6047 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6050 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6054 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6055 6056 /* 6057 Computes (B'*A')' since computing B*A directly is untenable 6058 6059 n p p 6060 [ ] [ ] [ ] 6061 m [ A ] * n [ B ] = m [ C ] 6062 [ ] [ ] [ ] 6063 6064 */ 6065 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6066 { 6067 Mat At, Bt, Ct; 6068 6069 PetscFunctionBegin; 6070 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6071 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6072 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6073 PetscCall(MatDestroy(&At)); 6074 PetscCall(MatDestroy(&Bt)); 6075 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6076 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6077 PetscCall(MatDestroy(&Ct)); 6078 PetscFunctionReturn(PETSC_SUCCESS); 6079 } 6080 6081 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6082 { 6083 PetscBool cisdense; 6084 6085 PetscFunctionBegin; 6086 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6087 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6088 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6089 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6090 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6091 PetscCall(MatSetUp(C)); 6092 6093 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6094 PetscFunctionReturn(PETSC_SUCCESS); 6095 } 6096 6097 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6098 { 6099 Mat_Product *product = C->product; 6100 Mat A = product->A, B = product->B; 6101 6102 PetscFunctionBegin; 6103 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6104 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6105 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6106 C->ops->productsymbolic = MatProductSymbolic_AB; 6107 PetscFunctionReturn(PETSC_SUCCESS); 6108 } 6109 6110 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6111 { 6112 Mat_Product *product = C->product; 6113 6114 PetscFunctionBegin; 6115 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6116 PetscFunctionReturn(PETSC_SUCCESS); 6117 } 6118 6119 /* 6120 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6121 6122 Input Parameters: 6123 6124 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6125 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6126 6127 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6128 6129 For Set1, j1[] contains column indices of the nonzeros. 6130 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6131 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6132 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6133 6134 Similar for Set2. 6135 6136 This routine merges the two sets of nonzeros row by row and removes repeats. 6137 6138 Output Parameters: (memory is allocated by the caller) 6139 6140 i[],j[]: the CSR of the merged matrix, which has m rows. 6141 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6142 imap2[]: similar to imap1[], but for Set2. 6143 Note we order nonzeros row-by-row and from left to right. 6144 */ 6145 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6146 { 6147 PetscInt r, m; /* Row index of mat */ 6148 PetscCount t, t1, t2, b1, e1, b2, e2; 6149 6150 PetscFunctionBegin; 6151 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6152 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6153 i[0] = 0; 6154 for (r = 0; r < m; r++) { /* Do row by row merging */ 6155 b1 = rowBegin1[r]; 6156 e1 = rowEnd1[r]; 6157 b2 = rowBegin2[r]; 6158 e2 = rowEnd2[r]; 6159 while (b1 < e1 && b2 < e2) { 6160 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6161 j[t] = j1[b1]; 6162 imap1[t1] = t; 6163 imap2[t2] = t; 6164 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6165 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6166 t1++; 6167 t2++; 6168 t++; 6169 } else if (j1[b1] < j2[b2]) { 6170 j[t] = j1[b1]; 6171 imap1[t1] = t; 6172 b1 += jmap1[t1 + 1] - jmap1[t1]; 6173 t1++; 6174 t++; 6175 } else { 6176 j[t] = j2[b2]; 6177 imap2[t2] = t; 6178 b2 += jmap2[t2 + 1] - jmap2[t2]; 6179 t2++; 6180 t++; 6181 } 6182 } 6183 /* Merge the remaining in either j1[] or j2[] */ 6184 while (b1 < e1) { 6185 j[t] = j1[b1]; 6186 imap1[t1] = t; 6187 b1 += jmap1[t1 + 1] - jmap1[t1]; 6188 t1++; 6189 t++; 6190 } 6191 while (b2 < e2) { 6192 j[t] = j2[b2]; 6193 imap2[t2] = t; 6194 b2 += jmap2[t2 + 1] - jmap2[t2]; 6195 t2++; 6196 t++; 6197 } 6198 i[r + 1] = t; 6199 } 6200 PetscFunctionReturn(PETSC_SUCCESS); 6201 } 6202 6203 /* 6204 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6205 6206 Input Parameters: 6207 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6208 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6209 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6210 6211 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6212 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6213 6214 Output Parameters: 6215 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6216 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6217 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6218 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6219 6220 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6221 Atot: number of entries belonging to the diagonal block. 6222 Annz: number of unique nonzeros belonging to the diagonal block. 6223 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6224 repeats (i.e., same 'i,j' pair). 6225 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6226 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6227 6228 Atot: number of entries belonging to the diagonal block 6229 Annz: number of unique nonzeros belonging to the diagonal block. 6230 6231 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6232 6233 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6234 */ 6235 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6236 { 6237 PetscInt cstart, cend, rstart, rend, row, col; 6238 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6239 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6240 PetscCount k, m, p, q, r, s, mid; 6241 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6242 6243 PetscFunctionBegin; 6244 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6245 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6246 m = rend - rstart; 6247 6248 /* Skip negative rows */ 6249 for (k = 0; k < n; k++) 6250 if (i[k] >= 0) break; 6251 6252 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6253 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6254 */ 6255 while (k < n) { 6256 row = i[k]; 6257 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6258 for (s = k; s < n; s++) 6259 if (i[s] != row) break; 6260 6261 /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6262 for (p = k; p < s; p++) { 6263 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; 6264 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6265 } 6266 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6267 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6268 rowBegin[row - rstart] = k; 6269 rowMid[row - rstart] = mid; 6270 rowEnd[row - rstart] = s; 6271 6272 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6273 Atot += mid - k; 6274 Btot += s - mid; 6275 6276 /* Count unique nonzeros of this diag row */ 6277 for (p = k; p < mid;) { 6278 col = j[p]; 6279 do { 6280 j[p] += PETSC_MAX_INT; /* Revert the modified diagonal indices */ 6281 p++; 6282 } while (p < mid && j[p] == col); 6283 Annz++; 6284 } 6285 6286 /* Count unique nonzeros of this offdiag row */ 6287 for (p = mid; p < s;) { 6288 col = j[p]; 6289 do { 6290 p++; 6291 } while (p < s && j[p] == col); 6292 Bnnz++; 6293 } 6294 k = s; 6295 } 6296 6297 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6298 PetscCall(PetscMalloc1(Atot, &Aperm)); 6299 PetscCall(PetscMalloc1(Btot, &Bperm)); 6300 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6301 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6302 6303 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6304 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6305 for (r = 0; r < m; r++) { 6306 k = rowBegin[r]; 6307 mid = rowMid[r]; 6308 s = rowEnd[r]; 6309 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6310 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6311 Atot += mid - k; 6312 Btot += s - mid; 6313 6314 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6315 for (p = k; p < mid;) { 6316 col = j[p]; 6317 q = p; 6318 do { 6319 p++; 6320 } while (p < mid && j[p] == col); 6321 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6322 Annz++; 6323 } 6324 6325 for (p = mid; p < s;) { 6326 col = j[p]; 6327 q = p; 6328 do { 6329 p++; 6330 } while (p < s && j[p] == col); 6331 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6332 Bnnz++; 6333 } 6334 } 6335 /* Output */ 6336 *Aperm_ = Aperm; 6337 *Annz_ = Annz; 6338 *Atot_ = Atot; 6339 *Ajmap_ = Ajmap; 6340 *Bperm_ = Bperm; 6341 *Bnnz_ = Bnnz; 6342 *Btot_ = Btot; 6343 *Bjmap_ = Bjmap; 6344 PetscFunctionReturn(PETSC_SUCCESS); 6345 } 6346 6347 /* 6348 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6349 6350 Input Parameters: 6351 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6352 nnz: number of unique nonzeros in the merged matrix 6353 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6354 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6355 6356 Output Parameter: (memory is allocated by the caller) 6357 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6358 6359 Example: 6360 nnz1 = 4 6361 nnz = 6 6362 imap = [1,3,4,5] 6363 jmap = [0,3,5,6,7] 6364 then, 6365 jmap_new = [0,0,3,3,5,6,7] 6366 */ 6367 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6368 { 6369 PetscCount k, p; 6370 6371 PetscFunctionBegin; 6372 jmap_new[0] = 0; 6373 p = nnz; /* p loops over jmap_new[] backwards */ 6374 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6375 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6376 } 6377 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6378 PetscFunctionReturn(PETSC_SUCCESS); 6379 } 6380 6381 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void *data) 6382 { 6383 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)data; 6384 6385 PetscFunctionBegin; 6386 PetscCall(PetscSFDestroy(&coo->sf)); 6387 PetscCall(PetscFree(coo->Aperm1)); 6388 PetscCall(PetscFree(coo->Bperm1)); 6389 PetscCall(PetscFree(coo->Ajmap1)); 6390 PetscCall(PetscFree(coo->Bjmap1)); 6391 PetscCall(PetscFree(coo->Aimap2)); 6392 PetscCall(PetscFree(coo->Bimap2)); 6393 PetscCall(PetscFree(coo->Aperm2)); 6394 PetscCall(PetscFree(coo->Bperm2)); 6395 PetscCall(PetscFree(coo->Ajmap2)); 6396 PetscCall(PetscFree(coo->Bjmap2)); 6397 PetscCall(PetscFree(coo->Cperm1)); 6398 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6399 PetscCall(PetscFree(coo)); 6400 PetscFunctionReturn(PETSC_SUCCESS); 6401 } 6402 6403 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6404 { 6405 MPI_Comm comm; 6406 PetscMPIInt rank, size; 6407 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6408 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6409 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6410 PetscContainer container; 6411 MatCOOStruct_MPIAIJ *coo; 6412 6413 PetscFunctionBegin; 6414 PetscCall(PetscFree(mpiaij->garray)); 6415 PetscCall(VecDestroy(&mpiaij->lvec)); 6416 #if defined(PETSC_USE_CTABLE) 6417 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6418 #else 6419 PetscCall(PetscFree(mpiaij->colmap)); 6420 #endif 6421 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6422 mat->assembled = PETSC_FALSE; 6423 mat->was_assembled = PETSC_FALSE; 6424 6425 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6426 PetscCallMPI(MPI_Comm_size(comm, &size)); 6427 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6428 PetscCall(PetscLayoutSetUp(mat->rmap)); 6429 PetscCall(PetscLayoutSetUp(mat->cmap)); 6430 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6431 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6432 PetscCall(MatGetLocalSize(mat, &m, &n)); 6433 PetscCall(MatGetSize(mat, &M, &N)); 6434 6435 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6436 /* entries come first, then local rows, then remote rows. */ 6437 PetscCount n1 = coo_n, *perm1; 6438 PetscInt *i1 = coo_i, *j1 = coo_j; 6439 6440 PetscCall(PetscMalloc1(n1, &perm1)); 6441 for (k = 0; k < n1; k++) perm1[k] = k; 6442 6443 /* Manipulate indices so that entries with negative row or col indices will have smallest 6444 row indices, local entries will have greater but negative row indices, and remote entries 6445 will have positive row indices. 6446 */ 6447 for (k = 0; k < n1; k++) { 6448 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6449 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6450 else { 6451 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6452 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6453 } 6454 } 6455 6456 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6457 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6458 6459 /* Advance k to the first entry we need to take care of */ 6460 for (k = 0; k < n1; k++) 6461 if (i1[k] > PETSC_MIN_INT) break; 6462 PetscInt i1start = k; 6463 6464 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6465 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6466 6467 /* Send remote rows to their owner */ 6468 /* Find which rows should be sent to which remote ranks*/ 6469 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6470 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6471 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6472 const PetscInt *ranges; 6473 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6474 6475 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6476 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6477 for (k = rem; k < n1;) { 6478 PetscMPIInt owner; 6479 PetscInt firstRow, lastRow; 6480 6481 /* Locate a row range */ 6482 firstRow = i1[k]; /* first row of this owner */ 6483 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6484 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6485 6486 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6487 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6488 6489 /* All entries in [k,p) belong to this remote owner */ 6490 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6491 PetscMPIInt *sendto2; 6492 PetscInt *nentries2; 6493 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6494 6495 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6496 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6497 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6498 PetscCall(PetscFree2(sendto, nentries2)); 6499 sendto = sendto2; 6500 nentries = nentries2; 6501 maxNsend = maxNsend2; 6502 } 6503 sendto[nsend] = owner; 6504 nentries[nsend] = p - k; 6505 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6506 nsend++; 6507 k = p; 6508 } 6509 6510 /* Build 1st SF to know offsets on remote to send data */ 6511 PetscSF sf1; 6512 PetscInt nroots = 1, nroots2 = 0; 6513 PetscInt nleaves = nsend, nleaves2 = 0; 6514 PetscInt *offsets; 6515 PetscSFNode *iremote; 6516 6517 PetscCall(PetscSFCreate(comm, &sf1)); 6518 PetscCall(PetscMalloc1(nsend, &iremote)); 6519 PetscCall(PetscMalloc1(nsend, &offsets)); 6520 for (k = 0; k < nsend; k++) { 6521 iremote[k].rank = sendto[k]; 6522 iremote[k].index = 0; 6523 nleaves2 += nentries[k]; 6524 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6525 } 6526 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6527 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6528 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6529 PetscCall(PetscSFDestroy(&sf1)); 6530 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6531 6532 /* Build 2nd SF to send remote COOs to their owner */ 6533 PetscSF sf2; 6534 nroots = nroots2; 6535 nleaves = nleaves2; 6536 PetscCall(PetscSFCreate(comm, &sf2)); 6537 PetscCall(PetscSFSetFromOptions(sf2)); 6538 PetscCall(PetscMalloc1(nleaves, &iremote)); 6539 p = 0; 6540 for (k = 0; k < nsend; k++) { 6541 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6542 for (q = 0; q < nentries[k]; q++, p++) { 6543 iremote[p].rank = sendto[k]; 6544 iremote[p].index = offsets[k] + q; 6545 } 6546 } 6547 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6548 6549 /* Send the remote COOs to their owner */ 6550 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6551 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6552 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6553 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6554 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6555 PetscInt *i1prem = i1 ? i1 + rem : NULL; /* silence ubsan warnings about pointer arithmetic on null pointer */ 6556 PetscInt *j1prem = j1 ? j1 + rem : NULL; 6557 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6558 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6559 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6560 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6561 6562 PetscCall(PetscFree(offsets)); 6563 PetscCall(PetscFree2(sendto, nentries)); 6564 6565 /* Sort received COOs by row along with the permutation array */ 6566 for (k = 0; k < n2; k++) perm2[k] = k; 6567 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6568 6569 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6570 PetscCount *Cperm1; 6571 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6572 PetscCount *perm1prem = perm1 ? perm1 + rem : NULL; 6573 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6574 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6575 6576 /* Support for HYPRE matrices, kind of a hack. 6577 Swap min column with diagonal so that diagonal values will go first */ 6578 PetscBool hypre; 6579 const char *name; 6580 PetscCall(PetscObjectGetName((PetscObject)mat, &name)); 6581 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", name, &hypre)); 6582 if (hypre) { 6583 PetscInt *minj; 6584 PetscBT hasdiag; 6585 6586 PetscCall(PetscBTCreate(m, &hasdiag)); 6587 PetscCall(PetscMalloc1(m, &minj)); 6588 for (k = 0; k < m; k++) minj[k] = PETSC_MAX_INT; 6589 for (k = i1start; k < rem; k++) { 6590 if (j1[k] < cstart || j1[k] >= cend) continue; 6591 const PetscInt rindex = i1[k] - rstart; 6592 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6593 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6594 } 6595 for (k = 0; k < n2; k++) { 6596 if (j2[k] < cstart || j2[k] >= cend) continue; 6597 const PetscInt rindex = i2[k] - rstart; 6598 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6599 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6600 } 6601 for (k = i1start; k < rem; k++) { 6602 const PetscInt rindex = i1[k] - rstart; 6603 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6604 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6605 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6606 } 6607 for (k = 0; k < n2; k++) { 6608 const PetscInt rindex = i2[k] - rstart; 6609 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6610 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6611 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6612 } 6613 PetscCall(PetscBTDestroy(&hasdiag)); 6614 PetscCall(PetscFree(minj)); 6615 } 6616 6617 /* Split local COOs and received COOs into diag/offdiag portions */ 6618 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6619 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6620 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6621 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6622 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6623 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6624 6625 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6626 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6627 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6628 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6629 6630 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6631 PetscInt *Ai, *Bi; 6632 PetscInt *Aj, *Bj; 6633 6634 PetscCall(PetscMalloc1(m + 1, &Ai)); 6635 PetscCall(PetscMalloc1(m + 1, &Bi)); 6636 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6637 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6638 6639 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6640 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6641 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6642 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6643 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6644 6645 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6646 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6647 6648 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6649 /* expect nonzeros in A/B most likely have local contributing entries */ 6650 PetscInt Annz = Ai[m]; 6651 PetscInt Bnnz = Bi[m]; 6652 PetscCount *Ajmap1_new, *Bjmap1_new; 6653 6654 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6655 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6656 6657 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6658 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6659 6660 PetscCall(PetscFree(Aimap1)); 6661 PetscCall(PetscFree(Ajmap1)); 6662 PetscCall(PetscFree(Bimap1)); 6663 PetscCall(PetscFree(Bjmap1)); 6664 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6665 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6666 PetscCall(PetscFree(perm1)); 6667 PetscCall(PetscFree3(i2, j2, perm2)); 6668 6669 Ajmap1 = Ajmap1_new; 6670 Bjmap1 = Bjmap1_new; 6671 6672 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6673 if (Annz < Annz1 + Annz2) { 6674 PetscInt *Aj_new; 6675 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6676 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6677 PetscCall(PetscFree(Aj)); 6678 Aj = Aj_new; 6679 } 6680 6681 if (Bnnz < Bnnz1 + Bnnz2) { 6682 PetscInt *Bj_new; 6683 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6684 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6685 PetscCall(PetscFree(Bj)); 6686 Bj = Bj_new; 6687 } 6688 6689 /* Create new submatrices for on-process and off-process coupling */ 6690 PetscScalar *Aa, *Ba; 6691 MatType rtype; 6692 Mat_SeqAIJ *a, *b; 6693 PetscObjectState state; 6694 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6695 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6696 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6697 if (cstart) { 6698 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6699 } 6700 6701 PetscCall(MatGetRootType_Private(mat, &rtype)); 6702 6703 MatSeqXAIJGetOptions_Private(mpiaij->A); 6704 PetscCall(MatDestroy(&mpiaij->A)); 6705 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6706 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6707 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6708 6709 MatSeqXAIJGetOptions_Private(mpiaij->B); 6710 PetscCall(MatDestroy(&mpiaij->B)); 6711 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6712 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6713 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6714 6715 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6716 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6717 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6718 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6719 6720 a = (Mat_SeqAIJ *)mpiaij->A->data; 6721 b = (Mat_SeqAIJ *)mpiaij->B->data; 6722 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6723 a->free_a = b->free_a = PETSC_TRUE; 6724 a->free_ij = b->free_ij = PETSC_TRUE; 6725 6726 /* conversion must happen AFTER multiply setup */ 6727 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6728 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6729 PetscCall(VecDestroy(&mpiaij->lvec)); 6730 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6731 6732 // Put the COO struct in a container and then attach that to the matrix 6733 PetscCall(PetscMalloc1(1, &coo)); 6734 coo->n = coo_n; 6735 coo->sf = sf2; 6736 coo->sendlen = nleaves; 6737 coo->recvlen = nroots; 6738 coo->Annz = Annz; 6739 coo->Bnnz = Bnnz; 6740 coo->Annz2 = Annz2; 6741 coo->Bnnz2 = Bnnz2; 6742 coo->Atot1 = Atot1; 6743 coo->Atot2 = Atot2; 6744 coo->Btot1 = Btot1; 6745 coo->Btot2 = Btot2; 6746 coo->Ajmap1 = Ajmap1; 6747 coo->Aperm1 = Aperm1; 6748 coo->Bjmap1 = Bjmap1; 6749 coo->Bperm1 = Bperm1; 6750 coo->Aimap2 = Aimap2; 6751 coo->Ajmap2 = Ajmap2; 6752 coo->Aperm2 = Aperm2; 6753 coo->Bimap2 = Bimap2; 6754 coo->Bjmap2 = Bjmap2; 6755 coo->Bperm2 = Bperm2; 6756 coo->Cperm1 = Cperm1; 6757 // Allocate in preallocation. If not used, it has zero cost on host 6758 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6759 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6760 PetscCall(PetscContainerSetPointer(container, coo)); 6761 PetscCall(PetscContainerSetUserDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6762 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6763 PetscCall(PetscContainerDestroy(&container)); 6764 PetscFunctionReturn(PETSC_SUCCESS); 6765 } 6766 6767 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6768 { 6769 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6770 Mat A = mpiaij->A, B = mpiaij->B; 6771 PetscScalar *Aa, *Ba; 6772 PetscScalar *sendbuf, *recvbuf; 6773 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6774 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6775 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6776 const PetscCount *Cperm1; 6777 PetscContainer container; 6778 MatCOOStruct_MPIAIJ *coo; 6779 6780 PetscFunctionBegin; 6781 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6782 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6783 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6784 sendbuf = coo->sendbuf; 6785 recvbuf = coo->recvbuf; 6786 Ajmap1 = coo->Ajmap1; 6787 Ajmap2 = coo->Ajmap2; 6788 Aimap2 = coo->Aimap2; 6789 Bjmap1 = coo->Bjmap1; 6790 Bjmap2 = coo->Bjmap2; 6791 Bimap2 = coo->Bimap2; 6792 Aperm1 = coo->Aperm1; 6793 Aperm2 = coo->Aperm2; 6794 Bperm1 = coo->Bperm1; 6795 Bperm2 = coo->Bperm2; 6796 Cperm1 = coo->Cperm1; 6797 6798 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6799 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6800 6801 /* Pack entries to be sent to remote */ 6802 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6803 6804 /* Send remote entries to their owner and overlap the communication with local computation */ 6805 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6806 /* Add local entries to A and B */ 6807 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6808 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6809 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6810 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6811 } 6812 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6813 PetscScalar sum = 0.0; 6814 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6815 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6816 } 6817 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6818 6819 /* Add received remote entries to A and B */ 6820 for (PetscCount i = 0; i < coo->Annz2; i++) { 6821 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6822 } 6823 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6824 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6825 } 6826 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6827 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6828 PetscFunctionReturn(PETSC_SUCCESS); 6829 } 6830 6831 /*MC 6832 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6833 6834 Options Database Keys: 6835 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6836 6837 Level: beginner 6838 6839 Notes: 6840 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6841 in this case the values associated with the rows and columns one passes in are set to zero 6842 in the matrix 6843 6844 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6845 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6846 6847 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6848 M*/ 6849 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6850 { 6851 Mat_MPIAIJ *b; 6852 PetscMPIInt size; 6853 6854 PetscFunctionBegin; 6855 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6856 6857 PetscCall(PetscNew(&b)); 6858 B->data = (void *)b; 6859 B->ops[0] = MatOps_Values; 6860 B->assembled = PETSC_FALSE; 6861 B->insertmode = NOT_SET_VALUES; 6862 b->size = size; 6863 6864 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6865 6866 /* build cache for off array entries formed */ 6867 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6868 6869 b->donotstash = PETSC_FALSE; 6870 b->colmap = NULL; 6871 b->garray = NULL; 6872 b->roworiented = PETSC_TRUE; 6873 6874 /* stuff used for matrix vector multiply */ 6875 b->lvec = NULL; 6876 b->Mvctx = NULL; 6877 6878 /* stuff for MatGetRow() */ 6879 b->rowindices = NULL; 6880 b->rowvalues = NULL; 6881 b->getrowactive = PETSC_FALSE; 6882 6883 /* flexible pointer used in CUSPARSE classes */ 6884 b->spptr = NULL; 6885 6886 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6887 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6888 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6889 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6890 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6891 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6892 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6893 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6894 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6896 #if defined(PETSC_HAVE_CUDA) 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6898 #endif 6899 #if defined(PETSC_HAVE_HIP) 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6901 #endif 6902 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6904 #endif 6905 #if defined(PETSC_HAVE_MKL_SPARSE) 6906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6907 #endif 6908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6912 #if defined(PETSC_HAVE_ELEMENTAL) 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6914 #endif 6915 #if defined(PETSC_HAVE_SCALAPACK) 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6917 #endif 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6920 #if defined(PETSC_HAVE_HYPRE) 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6923 #endif 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6928 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6929 PetscFunctionReturn(PETSC_SUCCESS); 6930 } 6931 6932 /*@C 6933 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6934 and "off-diagonal" part of the matrix in CSR format. 6935 6936 Collective 6937 6938 Input Parameters: 6939 + comm - MPI communicator 6940 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6941 . n - This value should be the same as the local size used in creating the 6942 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6943 calculated if `N` is given) For square matrices `n` is almost always `m`. 6944 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6945 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6946 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6947 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6948 . a - matrix values 6949 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6950 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6951 - oa - matrix values 6952 6953 Output Parameter: 6954 . mat - the matrix 6955 6956 Level: advanced 6957 6958 Notes: 6959 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6960 must free the arrays once the matrix has been destroyed and not before. 6961 6962 The `i` and `j` indices are 0 based 6963 6964 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6965 6966 This sets local rows and cannot be used to set off-processor values. 6967 6968 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6969 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6970 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6971 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6972 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6973 communication if it is known that only local entries will be set. 6974 6975 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6976 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6977 @*/ 6978 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6979 { 6980 Mat_MPIAIJ *maij; 6981 6982 PetscFunctionBegin; 6983 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6984 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6985 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6986 PetscCall(MatCreate(comm, mat)); 6987 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6988 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6989 maij = (Mat_MPIAIJ *)(*mat)->data; 6990 6991 (*mat)->preallocated = PETSC_TRUE; 6992 6993 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6994 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6995 6996 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6997 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6998 6999 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7000 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7001 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7002 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7003 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7004 PetscFunctionReturn(PETSC_SUCCESS); 7005 } 7006 7007 typedef struct { 7008 Mat *mp; /* intermediate products */ 7009 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7010 PetscInt cp; /* number of intermediate products */ 7011 7012 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7013 PetscInt *startsj_s, *startsj_r; 7014 PetscScalar *bufa; 7015 Mat P_oth; 7016 7017 /* may take advantage of merging product->B */ 7018 Mat Bloc; /* B-local by merging diag and off-diag */ 7019 7020 /* cusparse does not have support to split between symbolic and numeric phases. 7021 When api_user is true, we don't need to update the numerical values 7022 of the temporary storage */ 7023 PetscBool reusesym; 7024 7025 /* support for COO values insertion */ 7026 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7027 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7028 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7029 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7030 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7031 PetscMemType mtype; 7032 7033 /* customization */ 7034 PetscBool abmerge; 7035 PetscBool P_oth_bind; 7036 } MatMatMPIAIJBACKEND; 7037 7038 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7039 { 7040 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7041 PetscInt i; 7042 7043 PetscFunctionBegin; 7044 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7045 PetscCall(PetscFree(mmdata->bufa)); 7046 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7047 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7048 PetscCall(MatDestroy(&mmdata->P_oth)); 7049 PetscCall(MatDestroy(&mmdata->Bloc)); 7050 PetscCall(PetscSFDestroy(&mmdata->sf)); 7051 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7052 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7053 PetscCall(PetscFree(mmdata->own[0])); 7054 PetscCall(PetscFree(mmdata->own)); 7055 PetscCall(PetscFree(mmdata->off[0])); 7056 PetscCall(PetscFree(mmdata->off)); 7057 PetscCall(PetscFree(mmdata)); 7058 PetscFunctionReturn(PETSC_SUCCESS); 7059 } 7060 7061 /* Copy selected n entries with indices in idx[] of A to v[]. 7062 If idx is NULL, copy the whole data array of A to v[] 7063 */ 7064 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7065 { 7066 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7067 7068 PetscFunctionBegin; 7069 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7070 if (f) { 7071 PetscCall((*f)(A, n, idx, v)); 7072 } else { 7073 const PetscScalar *vv; 7074 7075 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7076 if (n && idx) { 7077 PetscScalar *w = v; 7078 const PetscInt *oi = idx; 7079 PetscInt j; 7080 7081 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7082 } else { 7083 PetscCall(PetscArraycpy(v, vv, n)); 7084 } 7085 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7086 } 7087 PetscFunctionReturn(PETSC_SUCCESS); 7088 } 7089 7090 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7091 { 7092 MatMatMPIAIJBACKEND *mmdata; 7093 PetscInt i, n_d, n_o; 7094 7095 PetscFunctionBegin; 7096 MatCheckProduct(C, 1); 7097 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7098 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7099 if (!mmdata->reusesym) { /* update temporary matrices */ 7100 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7101 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7102 } 7103 mmdata->reusesym = PETSC_FALSE; 7104 7105 for (i = 0; i < mmdata->cp; i++) { 7106 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7107 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7108 } 7109 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7110 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7111 7112 if (mmdata->mptmp[i]) continue; 7113 if (noff) { 7114 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7115 7116 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7117 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7118 n_o += noff; 7119 n_d += nown; 7120 } else { 7121 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7122 7123 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7124 n_d += mm->nz; 7125 } 7126 } 7127 if (mmdata->hasoffproc) { /* offprocess insertion */ 7128 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7129 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7130 } 7131 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7132 PetscFunctionReturn(PETSC_SUCCESS); 7133 } 7134 7135 /* Support for Pt * A, A * P, or Pt * A * P */ 7136 #define MAX_NUMBER_INTERMEDIATE 4 7137 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7138 { 7139 Mat_Product *product = C->product; 7140 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7141 Mat_MPIAIJ *a, *p; 7142 MatMatMPIAIJBACKEND *mmdata; 7143 ISLocalToGlobalMapping P_oth_l2g = NULL; 7144 IS glob = NULL; 7145 const char *prefix; 7146 char pprefix[256]; 7147 const PetscInt *globidx, *P_oth_idx; 7148 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7149 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7150 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7151 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7152 /* a base offset; type-2: sparse with a local to global map table */ 7153 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7154 7155 MatProductType ptype; 7156 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7157 PetscMPIInt size; 7158 7159 PetscFunctionBegin; 7160 MatCheckProduct(C, 1); 7161 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7162 ptype = product->type; 7163 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7164 ptype = MATPRODUCT_AB; 7165 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7166 } 7167 switch (ptype) { 7168 case MATPRODUCT_AB: 7169 A = product->A; 7170 P = product->B; 7171 m = A->rmap->n; 7172 n = P->cmap->n; 7173 M = A->rmap->N; 7174 N = P->cmap->N; 7175 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7176 break; 7177 case MATPRODUCT_AtB: 7178 P = product->A; 7179 A = product->B; 7180 m = P->cmap->n; 7181 n = A->cmap->n; 7182 M = P->cmap->N; 7183 N = A->cmap->N; 7184 hasoffproc = PETSC_TRUE; 7185 break; 7186 case MATPRODUCT_PtAP: 7187 A = product->A; 7188 P = product->B; 7189 m = P->cmap->n; 7190 n = P->cmap->n; 7191 M = P->cmap->N; 7192 N = P->cmap->N; 7193 hasoffproc = PETSC_TRUE; 7194 break; 7195 default: 7196 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7197 } 7198 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7199 if (size == 1) hasoffproc = PETSC_FALSE; 7200 7201 /* defaults */ 7202 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7203 mp[i] = NULL; 7204 mptmp[i] = PETSC_FALSE; 7205 rmapt[i] = -1; 7206 cmapt[i] = -1; 7207 rmapa[i] = NULL; 7208 cmapa[i] = NULL; 7209 } 7210 7211 /* customization */ 7212 PetscCall(PetscNew(&mmdata)); 7213 mmdata->reusesym = product->api_user; 7214 if (ptype == MATPRODUCT_AB) { 7215 if (product->api_user) { 7216 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7217 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7218 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7219 PetscOptionsEnd(); 7220 } else { 7221 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7222 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7223 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7224 PetscOptionsEnd(); 7225 } 7226 } else if (ptype == MATPRODUCT_PtAP) { 7227 if (product->api_user) { 7228 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7229 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7230 PetscOptionsEnd(); 7231 } else { 7232 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7233 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7234 PetscOptionsEnd(); 7235 } 7236 } 7237 a = (Mat_MPIAIJ *)A->data; 7238 p = (Mat_MPIAIJ *)P->data; 7239 PetscCall(MatSetSizes(C, m, n, M, N)); 7240 PetscCall(PetscLayoutSetUp(C->rmap)); 7241 PetscCall(PetscLayoutSetUp(C->cmap)); 7242 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7243 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7244 7245 cp = 0; 7246 switch (ptype) { 7247 case MATPRODUCT_AB: /* A * P */ 7248 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7249 7250 /* A_diag * P_local (merged or not) */ 7251 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7252 /* P is product->B */ 7253 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7254 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7255 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7256 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7257 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7258 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7259 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7260 mp[cp]->product->api_user = product->api_user; 7261 PetscCall(MatProductSetFromOptions(mp[cp])); 7262 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7263 PetscCall(ISGetIndices(glob, &globidx)); 7264 rmapt[cp] = 1; 7265 cmapt[cp] = 2; 7266 cmapa[cp] = globidx; 7267 mptmp[cp] = PETSC_FALSE; 7268 cp++; 7269 } else { /* A_diag * P_diag and A_diag * P_off */ 7270 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7271 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7272 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7273 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7274 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7275 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7276 mp[cp]->product->api_user = product->api_user; 7277 PetscCall(MatProductSetFromOptions(mp[cp])); 7278 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7279 rmapt[cp] = 1; 7280 cmapt[cp] = 1; 7281 mptmp[cp] = PETSC_FALSE; 7282 cp++; 7283 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7284 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7285 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7286 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7287 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7288 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7289 mp[cp]->product->api_user = product->api_user; 7290 PetscCall(MatProductSetFromOptions(mp[cp])); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 rmapt[cp] = 1; 7293 cmapt[cp] = 2; 7294 cmapa[cp] = p->garray; 7295 mptmp[cp] = PETSC_FALSE; 7296 cp++; 7297 } 7298 7299 /* A_off * P_other */ 7300 if (mmdata->P_oth) { 7301 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7302 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7303 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7304 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7305 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7306 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7307 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7308 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7309 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7310 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7311 mp[cp]->product->api_user = product->api_user; 7312 PetscCall(MatProductSetFromOptions(mp[cp])); 7313 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7314 rmapt[cp] = 1; 7315 cmapt[cp] = 2; 7316 cmapa[cp] = P_oth_idx; 7317 mptmp[cp] = PETSC_FALSE; 7318 cp++; 7319 } 7320 break; 7321 7322 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7323 /* A is product->B */ 7324 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7325 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7326 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7327 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7328 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7329 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7330 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7331 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7332 mp[cp]->product->api_user = product->api_user; 7333 PetscCall(MatProductSetFromOptions(mp[cp])); 7334 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7335 PetscCall(ISGetIndices(glob, &globidx)); 7336 rmapt[cp] = 2; 7337 rmapa[cp] = globidx; 7338 cmapt[cp] = 2; 7339 cmapa[cp] = globidx; 7340 mptmp[cp] = PETSC_FALSE; 7341 cp++; 7342 } else { 7343 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7344 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7345 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7346 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7347 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7348 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7349 mp[cp]->product->api_user = product->api_user; 7350 PetscCall(MatProductSetFromOptions(mp[cp])); 7351 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7352 PetscCall(ISGetIndices(glob, &globidx)); 7353 rmapt[cp] = 1; 7354 cmapt[cp] = 2; 7355 cmapa[cp] = globidx; 7356 mptmp[cp] = PETSC_FALSE; 7357 cp++; 7358 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7359 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7360 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7361 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7362 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7363 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7364 mp[cp]->product->api_user = product->api_user; 7365 PetscCall(MatProductSetFromOptions(mp[cp])); 7366 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7367 rmapt[cp] = 2; 7368 rmapa[cp] = p->garray; 7369 cmapt[cp] = 2; 7370 cmapa[cp] = globidx; 7371 mptmp[cp] = PETSC_FALSE; 7372 cp++; 7373 } 7374 break; 7375 case MATPRODUCT_PtAP: 7376 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7377 /* P is product->B */ 7378 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7379 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7380 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7381 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7382 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7383 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7384 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7385 mp[cp]->product->api_user = product->api_user; 7386 PetscCall(MatProductSetFromOptions(mp[cp])); 7387 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7388 PetscCall(ISGetIndices(glob, &globidx)); 7389 rmapt[cp] = 2; 7390 rmapa[cp] = globidx; 7391 cmapt[cp] = 2; 7392 cmapa[cp] = globidx; 7393 mptmp[cp] = PETSC_FALSE; 7394 cp++; 7395 if (mmdata->P_oth) { 7396 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7397 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7398 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7399 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7400 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7401 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7402 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7403 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7404 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7405 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7406 mp[cp]->product->api_user = product->api_user; 7407 PetscCall(MatProductSetFromOptions(mp[cp])); 7408 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7409 mptmp[cp] = PETSC_TRUE; 7410 cp++; 7411 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7412 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7413 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7414 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7415 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7416 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7417 mp[cp]->product->api_user = product->api_user; 7418 PetscCall(MatProductSetFromOptions(mp[cp])); 7419 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7420 rmapt[cp] = 2; 7421 rmapa[cp] = globidx; 7422 cmapt[cp] = 2; 7423 cmapa[cp] = P_oth_idx; 7424 mptmp[cp] = PETSC_FALSE; 7425 cp++; 7426 } 7427 break; 7428 default: 7429 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7430 } 7431 /* sanity check */ 7432 if (size > 1) 7433 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7434 7435 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7436 for (i = 0; i < cp; i++) { 7437 mmdata->mp[i] = mp[i]; 7438 mmdata->mptmp[i] = mptmp[i]; 7439 } 7440 mmdata->cp = cp; 7441 C->product->data = mmdata; 7442 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7443 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7444 7445 /* memory type */ 7446 mmdata->mtype = PETSC_MEMTYPE_HOST; 7447 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7448 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7449 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7450 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7451 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7452 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7453 7454 /* prepare coo coordinates for values insertion */ 7455 7456 /* count total nonzeros of those intermediate seqaij Mats 7457 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7458 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7459 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7460 */ 7461 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7462 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7463 if (mptmp[cp]) continue; 7464 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7465 const PetscInt *rmap = rmapa[cp]; 7466 const PetscInt mr = mp[cp]->rmap->n; 7467 const PetscInt rs = C->rmap->rstart; 7468 const PetscInt re = C->rmap->rend; 7469 const PetscInt *ii = mm->i; 7470 for (i = 0; i < mr; i++) { 7471 const PetscInt gr = rmap[i]; 7472 const PetscInt nz = ii[i + 1] - ii[i]; 7473 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7474 else ncoo_oown += nz; /* this row is local */ 7475 } 7476 } else ncoo_d += mm->nz; 7477 } 7478 7479 /* 7480 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7481 7482 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7483 7484 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7485 7486 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7487 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7488 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7489 7490 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7491 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7492 */ 7493 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7494 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7495 7496 /* gather (i,j) of nonzeros inserted by remote procs */ 7497 if (hasoffproc) { 7498 PetscSF msf; 7499 PetscInt ncoo2, *coo_i2, *coo_j2; 7500 7501 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7502 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7503 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7504 7505 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7506 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7507 PetscInt *idxoff = mmdata->off[cp]; 7508 PetscInt *idxown = mmdata->own[cp]; 7509 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7510 const PetscInt *rmap = rmapa[cp]; 7511 const PetscInt *cmap = cmapa[cp]; 7512 const PetscInt *ii = mm->i; 7513 PetscInt *coi = coo_i + ncoo_o; 7514 PetscInt *coj = coo_j + ncoo_o; 7515 const PetscInt mr = mp[cp]->rmap->n; 7516 const PetscInt rs = C->rmap->rstart; 7517 const PetscInt re = C->rmap->rend; 7518 const PetscInt cs = C->cmap->rstart; 7519 for (i = 0; i < mr; i++) { 7520 const PetscInt *jj = mm->j + ii[i]; 7521 const PetscInt gr = rmap[i]; 7522 const PetscInt nz = ii[i + 1] - ii[i]; 7523 if (gr < rs || gr >= re) { /* this is an offproc row */ 7524 for (j = ii[i]; j < ii[i + 1]; j++) { 7525 *coi++ = gr; 7526 *idxoff++ = j; 7527 } 7528 if (!cmapt[cp]) { /* already global */ 7529 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7530 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7531 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7532 } else { /* offdiag */ 7533 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7534 } 7535 ncoo_o += nz; 7536 } else { /* this is a local row */ 7537 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7538 } 7539 } 7540 } 7541 mmdata->off[cp + 1] = idxoff; 7542 mmdata->own[cp + 1] = idxown; 7543 } 7544 7545 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7546 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7547 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7548 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7549 ncoo = ncoo_d + ncoo_oown + ncoo2; 7550 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7551 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7552 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7553 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7554 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7555 PetscCall(PetscFree2(coo_i, coo_j)); 7556 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7557 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7558 coo_i = coo_i2; 7559 coo_j = coo_j2; 7560 } else { /* no offproc values insertion */ 7561 ncoo = ncoo_d; 7562 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7563 7564 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7565 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7566 PetscCall(PetscSFSetUp(mmdata->sf)); 7567 } 7568 mmdata->hasoffproc = hasoffproc; 7569 7570 /* gather (i,j) of nonzeros inserted locally */ 7571 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7572 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7573 PetscInt *coi = coo_i + ncoo_d; 7574 PetscInt *coj = coo_j + ncoo_d; 7575 const PetscInt *jj = mm->j; 7576 const PetscInt *ii = mm->i; 7577 const PetscInt *cmap = cmapa[cp]; 7578 const PetscInt *rmap = rmapa[cp]; 7579 const PetscInt mr = mp[cp]->rmap->n; 7580 const PetscInt rs = C->rmap->rstart; 7581 const PetscInt re = C->rmap->rend; 7582 const PetscInt cs = C->cmap->rstart; 7583 7584 if (mptmp[cp]) continue; 7585 if (rmapt[cp] == 1) { /* consecutive rows */ 7586 /* fill coo_i */ 7587 for (i = 0; i < mr; i++) { 7588 const PetscInt gr = i + rs; 7589 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7590 } 7591 /* fill coo_j */ 7592 if (!cmapt[cp]) { /* type-0, already global */ 7593 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7594 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7595 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7596 } else { /* type-2, local to global for sparse columns */ 7597 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7598 } 7599 ncoo_d += mm->nz; 7600 } else if (rmapt[cp] == 2) { /* sparse rows */ 7601 for (i = 0; i < mr; i++) { 7602 const PetscInt *jj = mm->j + ii[i]; 7603 const PetscInt gr = rmap[i]; 7604 const PetscInt nz = ii[i + 1] - ii[i]; 7605 if (gr >= rs && gr < re) { /* local rows */ 7606 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7607 if (!cmapt[cp]) { /* type-0, already global */ 7608 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7609 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7610 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7611 } else { /* type-2, local to global for sparse columns */ 7612 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7613 } 7614 ncoo_d += nz; 7615 } 7616 } 7617 } 7618 } 7619 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7620 PetscCall(ISDestroy(&glob)); 7621 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7622 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7623 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7624 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7625 7626 /* preallocate with COO data */ 7627 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7628 PetscCall(PetscFree2(coo_i, coo_j)); 7629 PetscFunctionReturn(PETSC_SUCCESS); 7630 } 7631 7632 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7633 { 7634 Mat_Product *product = mat->product; 7635 #if defined(PETSC_HAVE_DEVICE) 7636 PetscBool match = PETSC_FALSE; 7637 PetscBool usecpu = PETSC_FALSE; 7638 #else 7639 PetscBool match = PETSC_TRUE; 7640 #endif 7641 7642 PetscFunctionBegin; 7643 MatCheckProduct(mat, 1); 7644 #if defined(PETSC_HAVE_DEVICE) 7645 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7646 if (match) { /* we can always fallback to the CPU if requested */ 7647 switch (product->type) { 7648 case MATPRODUCT_AB: 7649 if (product->api_user) { 7650 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7651 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7652 PetscOptionsEnd(); 7653 } else { 7654 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7655 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7656 PetscOptionsEnd(); 7657 } 7658 break; 7659 case MATPRODUCT_AtB: 7660 if (product->api_user) { 7661 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7662 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7663 PetscOptionsEnd(); 7664 } else { 7665 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7666 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7667 PetscOptionsEnd(); 7668 } 7669 break; 7670 case MATPRODUCT_PtAP: 7671 if (product->api_user) { 7672 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7673 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7674 PetscOptionsEnd(); 7675 } else { 7676 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7677 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7678 PetscOptionsEnd(); 7679 } 7680 break; 7681 default: 7682 break; 7683 } 7684 match = (PetscBool)!usecpu; 7685 } 7686 #endif 7687 if (match) { 7688 switch (product->type) { 7689 case MATPRODUCT_AB: 7690 case MATPRODUCT_AtB: 7691 case MATPRODUCT_PtAP: 7692 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7693 break; 7694 default: 7695 break; 7696 } 7697 } 7698 /* fallback to MPIAIJ ops */ 7699 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7700 PetscFunctionReturn(PETSC_SUCCESS); 7701 } 7702 7703 /* 7704 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7705 7706 n - the number of block indices in cc[] 7707 cc - the block indices (must be large enough to contain the indices) 7708 */ 7709 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7710 { 7711 PetscInt cnt = -1, nidx, j; 7712 const PetscInt *idx; 7713 7714 PetscFunctionBegin; 7715 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7716 if (nidx) { 7717 cnt = 0; 7718 cc[cnt] = idx[0] / bs; 7719 for (j = 1; j < nidx; j++) { 7720 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7721 } 7722 } 7723 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7724 *n = cnt + 1; 7725 PetscFunctionReturn(PETSC_SUCCESS); 7726 } 7727 7728 /* 7729 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7730 7731 ncollapsed - the number of block indices 7732 collapsed - the block indices (must be large enough to contain the indices) 7733 */ 7734 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7735 { 7736 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7737 7738 PetscFunctionBegin; 7739 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7740 for (i = start + 1; i < start + bs; i++) { 7741 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7742 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7743 cprevtmp = cprev; 7744 cprev = merged; 7745 merged = cprevtmp; 7746 } 7747 *ncollapsed = nprev; 7748 if (collapsed) *collapsed = cprev; 7749 PetscFunctionReturn(PETSC_SUCCESS); 7750 } 7751 7752 /* 7753 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7754 7755 Input Parameter: 7756 . Amat - matrix 7757 - symmetrize - make the result symmetric 7758 + scale - scale with diagonal 7759 7760 Output Parameter: 7761 . a_Gmat - output scalar graph >= 0 7762 7763 */ 7764 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7765 { 7766 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7767 MPI_Comm comm; 7768 Mat Gmat; 7769 PetscBool ismpiaij, isseqaij; 7770 Mat a, b, c; 7771 MatType jtype; 7772 7773 PetscFunctionBegin; 7774 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7775 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7776 PetscCall(MatGetSize(Amat, &MM, &NN)); 7777 PetscCall(MatGetBlockSize(Amat, &bs)); 7778 nloc = (Iend - Istart) / bs; 7779 7780 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7781 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7782 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7783 7784 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7785 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7786 implementation */ 7787 if (bs > 1) { 7788 PetscCall(MatGetType(Amat, &jtype)); 7789 PetscCall(MatCreate(comm, &Gmat)); 7790 PetscCall(MatSetType(Gmat, jtype)); 7791 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7792 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7793 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7794 PetscInt *d_nnz, *o_nnz; 7795 MatScalar *aa, val, *AA; 7796 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7797 if (isseqaij) { 7798 a = Amat; 7799 b = NULL; 7800 } else { 7801 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7802 a = d->A; 7803 b = d->B; 7804 } 7805 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7806 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7807 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7808 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7809 const PetscInt *cols1, *cols2; 7810 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7811 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7812 nnz[brow / bs] = nc2 / bs; 7813 if (nc2 % bs) ok = 0; 7814 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7815 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7816 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7817 if (nc1 != nc2) ok = 0; 7818 else { 7819 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7820 if (cols1[jj] != cols2[jj]) ok = 0; 7821 if (cols1[jj] % bs != jj % bs) ok = 0; 7822 } 7823 } 7824 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7825 } 7826 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7827 if (!ok) { 7828 PetscCall(PetscFree2(d_nnz, o_nnz)); 7829 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7830 goto old_bs; 7831 } 7832 } 7833 } 7834 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7835 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7836 PetscCall(PetscFree2(d_nnz, o_nnz)); 7837 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7838 // diag 7839 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7840 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7841 ai = aseq->i; 7842 n = ai[brow + 1] - ai[brow]; 7843 aj = aseq->j + ai[brow]; 7844 for (int k = 0; k < n; k += bs) { // block columns 7845 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7846 val = 0; 7847 if (index_size == 0) { 7848 for (int ii = 0; ii < bs; ii++) { // rows in block 7849 aa = aseq->a + ai[brow + ii] + k; 7850 for (int jj = 0; jj < bs; jj++) { // columns in block 7851 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7852 } 7853 } 7854 } else { // use (index,index) value if provided 7855 for (int iii = 0; iii < index_size; iii++) { // rows in block 7856 int ii = index[iii]; 7857 aa = aseq->a + ai[brow + ii] + k; 7858 for (int jjj = 0; jjj < index_size; jjj++) { // columns in block 7859 int jj = index[jjj]; 7860 val += PetscAbs(PetscRealPart(aa[jj])); 7861 } 7862 } 7863 } 7864 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7865 AA[k / bs] = val; 7866 } 7867 grow = Istart / bs + brow / bs; 7868 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7869 } 7870 // off-diag 7871 if (ismpiaij) { 7872 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7873 const PetscScalar *vals; 7874 const PetscInt *cols, *garray = aij->garray; 7875 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7876 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7877 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7878 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7879 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7880 AA[k / bs] = 0; 7881 AJ[cidx] = garray[cols[k]] / bs; 7882 } 7883 nc = ncols / bs; 7884 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7885 if (index_size == 0) { 7886 for (int ii = 0; ii < bs; ii++) { // rows in block 7887 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7888 for (int k = 0; k < ncols; k += bs) { 7889 for (int jj = 0; jj < bs; jj++) { // cols in block 7890 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%d) >= nmax (%d)", (int)(k / bs), (int)nmax); 7891 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7892 } 7893 } 7894 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7895 } 7896 } else { // use (index,index) value if provided 7897 for (int iii = 0; iii < index_size; iii++) { // rows in block 7898 int ii = index[iii]; 7899 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7900 for (int k = 0; k < ncols; k += bs) { 7901 for (int jjj = 0; jjj < index_size; jjj++) { // cols in block 7902 int jj = index[jjj]; 7903 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7904 } 7905 } 7906 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7907 } 7908 } 7909 grow = Istart / bs + brow / bs; 7910 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7911 } 7912 } 7913 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7914 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7915 PetscCall(PetscFree2(AA, AJ)); 7916 } else { 7917 const PetscScalar *vals; 7918 const PetscInt *idx; 7919 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7920 old_bs: 7921 /* 7922 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7923 */ 7924 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7925 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7926 if (isseqaij) { 7927 PetscInt max_d_nnz; 7928 /* 7929 Determine exact preallocation count for (sequential) scalar matrix 7930 */ 7931 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7932 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7933 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7934 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7935 PetscCall(PetscFree3(w0, w1, w2)); 7936 } else if (ismpiaij) { 7937 Mat Daij, Oaij; 7938 const PetscInt *garray; 7939 PetscInt max_d_nnz; 7940 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7941 /* 7942 Determine exact preallocation count for diagonal block portion of scalar matrix 7943 */ 7944 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7945 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7946 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7947 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7948 PetscCall(PetscFree3(w0, w1, w2)); 7949 /* 7950 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7951 */ 7952 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7953 o_nnz[jj] = 0; 7954 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7955 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7956 o_nnz[jj] += ncols; 7957 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7958 } 7959 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7960 } 7961 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7962 /* get scalar copy (norms) of matrix */ 7963 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7964 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7965 PetscCall(PetscFree2(d_nnz, o_nnz)); 7966 for (Ii = Istart; Ii < Iend; Ii++) { 7967 PetscInt dest_row = Ii / bs; 7968 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7969 for (jj = 0; jj < ncols; jj++) { 7970 PetscInt dest_col = idx[jj] / bs; 7971 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7972 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7973 } 7974 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7975 } 7976 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7977 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7978 } 7979 } else { 7980 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7981 else { 7982 Gmat = Amat; 7983 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7984 } 7985 if (isseqaij) { 7986 a = Gmat; 7987 b = NULL; 7988 } else { 7989 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7990 a = d->A; 7991 b = d->B; 7992 } 7993 if (filter >= 0 || scale) { 7994 /* take absolute value of each entry */ 7995 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7996 MatInfo info; 7997 PetscScalar *avals; 7998 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7999 PetscCall(MatSeqAIJGetArray(c, &avals)); 8000 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8001 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8002 } 8003 } 8004 } 8005 if (symmetrize) { 8006 PetscBool isset, issym; 8007 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8008 if (!isset || !issym) { 8009 Mat matTrans; 8010 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8011 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8012 PetscCall(MatDestroy(&matTrans)); 8013 } 8014 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8015 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8016 if (scale) { 8017 /* scale c for all diagonal values = 1 or -1 */ 8018 Vec diag; 8019 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8020 PetscCall(MatGetDiagonal(Gmat, diag)); 8021 PetscCall(VecReciprocal(diag)); 8022 PetscCall(VecSqrtAbs(diag)); 8023 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8024 PetscCall(VecDestroy(&diag)); 8025 } 8026 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8027 8028 if (filter >= 0) { 8029 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8030 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8031 } 8032 *a_Gmat = Gmat; 8033 PetscFunctionReturn(PETSC_SUCCESS); 8034 } 8035 8036 /* 8037 Special version for direct calls from Fortran 8038 */ 8039 8040 /* Change these macros so can be used in void function */ 8041 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8042 #undef PetscCall 8043 #define PetscCall(...) \ 8044 do { \ 8045 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8046 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8047 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8048 return; \ 8049 } \ 8050 } while (0) 8051 8052 #undef SETERRQ 8053 #define SETERRQ(comm, ierr, ...) \ 8054 do { \ 8055 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8056 return; \ 8057 } while (0) 8058 8059 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8060 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8061 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8062 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8063 #else 8064 #endif 8065 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8066 { 8067 Mat mat = *mmat; 8068 PetscInt m = *mm, n = *mn; 8069 InsertMode addv = *maddv; 8070 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8071 PetscScalar value; 8072 8073 MatCheckPreallocated(mat, 1); 8074 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8075 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8076 { 8077 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8078 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8079 PetscBool roworiented = aij->roworiented; 8080 8081 /* Some Variables required in the macro */ 8082 Mat A = aij->A; 8083 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8084 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8085 MatScalar *aa; 8086 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8087 Mat B = aij->B; 8088 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8089 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8090 MatScalar *ba; 8091 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8092 * cannot use "#if defined" inside a macro. */ 8093 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8094 8095 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8096 PetscInt nonew = a->nonew; 8097 MatScalar *ap1, *ap2; 8098 8099 PetscFunctionBegin; 8100 PetscCall(MatSeqAIJGetArray(A, &aa)); 8101 PetscCall(MatSeqAIJGetArray(B, &ba)); 8102 for (i = 0; i < m; i++) { 8103 if (im[i] < 0) continue; 8104 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8105 if (im[i] >= rstart && im[i] < rend) { 8106 row = im[i] - rstart; 8107 lastcol1 = -1; 8108 rp1 = aj + ai[row]; 8109 ap1 = aa + ai[row]; 8110 rmax1 = aimax[row]; 8111 nrow1 = ailen[row]; 8112 low1 = 0; 8113 high1 = nrow1; 8114 lastcol2 = -1; 8115 rp2 = bj + bi[row]; 8116 ap2 = ba + bi[row]; 8117 rmax2 = bimax[row]; 8118 nrow2 = bilen[row]; 8119 low2 = 0; 8120 high2 = nrow2; 8121 8122 for (j = 0; j < n; j++) { 8123 if (roworiented) value = v[i * n + j]; 8124 else value = v[i + j * m]; 8125 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8126 if (in[j] >= cstart && in[j] < cend) { 8127 col = in[j] - cstart; 8128 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8129 } else if (in[j] < 0) continue; 8130 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8131 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8132 } else { 8133 if (mat->was_assembled) { 8134 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8135 #if defined(PETSC_USE_CTABLE) 8136 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8137 col--; 8138 #else 8139 col = aij->colmap[in[j]] - 1; 8140 #endif 8141 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8142 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8143 col = in[j]; 8144 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8145 B = aij->B; 8146 b = (Mat_SeqAIJ *)B->data; 8147 bimax = b->imax; 8148 bi = b->i; 8149 bilen = b->ilen; 8150 bj = b->j; 8151 rp2 = bj + bi[row]; 8152 ap2 = ba + bi[row]; 8153 rmax2 = bimax[row]; 8154 nrow2 = bilen[row]; 8155 low2 = 0; 8156 high2 = nrow2; 8157 bm = aij->B->rmap->n; 8158 ba = b->a; 8159 inserted = PETSC_FALSE; 8160 } 8161 } else col = in[j]; 8162 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8163 } 8164 } 8165 } else if (!aij->donotstash) { 8166 if (roworiented) { 8167 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8168 } else { 8169 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8170 } 8171 } 8172 } 8173 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8174 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8175 } 8176 PetscFunctionReturnVoid(); 8177 } 8178 8179 /* Undefining these here since they were redefined from their original definition above! No 8180 * other PETSc functions should be defined past this point, as it is impossible to recover the 8181 * original definitions */ 8182 #undef PetscCall 8183 #undef SETERRQ 8184