1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 10 { 11 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 12 13 PetscFunctionBegin; 14 #if defined(PETSC_USE_LOG) 15 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 16 #endif 17 PetscCall(MatStashDestroy_Private(&mat->stash)); 18 PetscCall(VecDestroy(&aij->diag)); 19 PetscCall(MatDestroy(&aij->A)); 20 PetscCall(MatDestroy(&aij->B)); 21 #if defined(PETSC_USE_CTABLE) 22 PetscCall(PetscHMapIDestroy(&aij->colmap)); 23 #else 24 PetscCall(PetscFree(aij->colmap)); 25 #endif 26 PetscCall(PetscFree(aij->garray)); 27 PetscCall(VecDestroy(&aij->lvec)); 28 PetscCall(VecScatterDestroy(&aij->Mvctx)); 29 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 30 PetscCall(PetscFree(aij->ld)); 31 32 /* Free COO */ 33 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 34 35 PetscCall(PetscFree(mat->data)); 36 37 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 38 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 39 40 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 43 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 45 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 47 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 48 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 49 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 50 #if defined(PETSC_HAVE_CUDA) 51 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 52 #endif 53 #if defined(PETSC_HAVE_HIP) 54 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 55 #endif 56 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 57 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 58 #endif 59 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 60 #if defined(PETSC_HAVE_ELEMENTAL) 61 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 62 #endif 63 #if defined(PETSC_HAVE_SCALAPACK) 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 65 #endif 66 #if defined(PETSC_HAVE_HYPRE) 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 69 #endif 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 74 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 76 #if defined(PETSC_HAVE_MKL_SPARSE) 77 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 78 #endif 79 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 80 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 82 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 84 PetscFunctionReturn(PETSC_SUCCESS); 85 } 86 87 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 88 #define TYPE AIJ 89 #define TYPE_AIJ 90 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 91 #undef TYPE 92 #undef TYPE_AIJ 93 94 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 95 { 96 Mat B; 97 98 PetscFunctionBegin; 99 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 100 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 101 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 102 PetscCall(MatDestroy(&B)); 103 PetscFunctionReturn(PETSC_SUCCESS); 104 } 105 106 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 107 { 108 Mat B; 109 110 PetscFunctionBegin; 111 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 112 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 113 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 /*MC 118 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 119 120 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 121 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 122 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 123 for communicators controlling multiple processes. It is recommended that you call both of 124 the above preallocation routines for simplicity. 125 126 Options Database Keys: 127 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 128 129 Developer Note: 130 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 131 enough exist. 132 133 Level: beginner 134 135 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 136 M*/ 137 138 /*MC 139 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 140 141 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 142 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 143 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 144 for communicators controlling multiple processes. It is recommended that you call both of 145 the above preallocation routines for simplicity. 146 147 Options Database Keys: 148 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 149 150 Level: beginner 151 152 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 153 M*/ 154 155 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 156 { 157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 158 159 PetscFunctionBegin; 160 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 161 A->boundtocpu = flg; 162 #endif 163 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 164 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 165 166 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 167 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 168 * to differ from the parent matrix. */ 169 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 170 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 171 172 PetscFunctionReturn(PETSC_SUCCESS); 173 } 174 175 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 176 { 177 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 178 179 PetscFunctionBegin; 180 if (mat->A) { 181 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 182 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 183 } 184 PetscFunctionReturn(PETSC_SUCCESS); 185 } 186 187 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 188 { 189 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 190 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 191 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 192 const PetscInt *ia, *ib; 193 const MatScalar *aa, *bb, *aav, *bav; 194 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 195 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 196 197 PetscFunctionBegin; 198 *keptrows = NULL; 199 200 ia = a->i; 201 ib = b->i; 202 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 203 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 204 for (i = 0; i < m; i++) { 205 na = ia[i + 1] - ia[i]; 206 nb = ib[i + 1] - ib[i]; 207 if (!na && !nb) { 208 cnt++; 209 goto ok1; 210 } 211 aa = aav + ia[i]; 212 for (j = 0; j < na; j++) { 213 if (aa[j] != 0.0) goto ok1; 214 } 215 bb = bav + ib[i]; 216 for (j = 0; j < nb; j++) { 217 if (bb[j] != 0.0) goto ok1; 218 } 219 cnt++; 220 ok1:; 221 } 222 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 223 if (!n0rows) { 224 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 225 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 226 PetscFunctionReturn(PETSC_SUCCESS); 227 } 228 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 229 cnt = 0; 230 for (i = 0; i < m; i++) { 231 na = ia[i + 1] - ia[i]; 232 nb = ib[i + 1] - ib[i]; 233 if (!na && !nb) continue; 234 aa = aav + ia[i]; 235 for (j = 0; j < na; j++) { 236 if (aa[j] != 0.0) { 237 rows[cnt++] = rstart + i; 238 goto ok2; 239 } 240 } 241 bb = bav + ib[i]; 242 for (j = 0; j < nb; j++) { 243 if (bb[j] != 0.0) { 244 rows[cnt++] = rstart + i; 245 goto ok2; 246 } 247 } 248 ok2:; 249 } 250 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 251 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 252 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 253 PetscFunctionReturn(PETSC_SUCCESS); 254 } 255 256 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 257 { 258 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 259 PetscBool cong; 260 261 PetscFunctionBegin; 262 PetscCall(MatHasCongruentLayouts(Y, &cong)); 263 if (Y->assembled && cong) { 264 PetscCall(MatDiagonalSet(aij->A, D, is)); 265 } else { 266 PetscCall(MatDiagonalSet_Default(Y, D, is)); 267 } 268 PetscFunctionReturn(PETSC_SUCCESS); 269 } 270 271 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 272 { 273 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 274 PetscInt i, rstart, nrows, *rows; 275 276 PetscFunctionBegin; 277 *zrows = NULL; 278 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 279 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 280 for (i = 0; i < nrows; i++) rows[i] += rstart; 281 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 282 PetscFunctionReturn(PETSC_SUCCESS); 283 } 284 285 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 286 { 287 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 288 PetscInt i, m, n, *garray = aij->garray; 289 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 290 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 291 PetscReal *work; 292 const PetscScalar *dummy; 293 294 PetscFunctionBegin; 295 PetscCall(MatGetSize(A, &m, &n)); 296 PetscCall(PetscCalloc1(n, &work)); 297 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 298 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 299 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 300 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 301 if (type == NORM_2) { 302 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 303 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 304 } else if (type == NORM_1) { 305 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 306 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 307 } else if (type == NORM_INFINITY) { 308 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 309 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 310 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 311 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 312 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 313 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 314 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 315 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 316 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 317 if (type == NORM_INFINITY) { 318 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 319 } else { 320 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 321 } 322 PetscCall(PetscFree(work)); 323 if (type == NORM_2) { 324 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 325 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 326 for (i = 0; i < n; i++) reductions[i] /= m; 327 } 328 PetscFunctionReturn(PETSC_SUCCESS); 329 } 330 331 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 332 { 333 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 334 IS sis, gis; 335 const PetscInt *isis, *igis; 336 PetscInt n, *iis, nsis, ngis, rstart, i; 337 338 PetscFunctionBegin; 339 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 340 PetscCall(MatFindNonzeroRows(a->B, &gis)); 341 PetscCall(ISGetSize(gis, &ngis)); 342 PetscCall(ISGetSize(sis, &nsis)); 343 PetscCall(ISGetIndices(sis, &isis)); 344 PetscCall(ISGetIndices(gis, &igis)); 345 346 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 347 PetscCall(PetscArraycpy(iis, igis, ngis)); 348 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 349 n = ngis + nsis; 350 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 351 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 352 for (i = 0; i < n; i++) iis[i] += rstart; 353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 354 355 PetscCall(ISRestoreIndices(sis, &isis)); 356 PetscCall(ISRestoreIndices(gis, &igis)); 357 PetscCall(ISDestroy(&sis)); 358 PetscCall(ISDestroy(&gis)); 359 PetscFunctionReturn(PETSC_SUCCESS); 360 } 361 362 /* 363 Local utility routine that creates a mapping from the global column 364 number to the local number in the off-diagonal part of the local 365 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 366 a slightly higher hash table cost; without it it is not scalable (each processor 367 has an order N integer array but is fast to access. 368 */ 369 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 370 { 371 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 372 PetscInt n = aij->B->cmap->n, i; 373 374 PetscFunctionBegin; 375 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 376 #if defined(PETSC_USE_CTABLE) 377 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 378 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 379 #else 380 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 381 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 382 #endif 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 387 { \ 388 if (col <= lastcol1) low1 = 0; \ 389 else high1 = nrow1; \ 390 lastcol1 = col; \ 391 while (high1 - low1 > 5) { \ 392 t = (low1 + high1) / 2; \ 393 if (rp1[t] > col) high1 = t; \ 394 else low1 = t; \ 395 } \ 396 for (_i = low1; _i < high1; _i++) { \ 397 if (rp1[_i] > col) break; \ 398 if (rp1[_i] == col) { \ 399 if (addv == ADD_VALUES) { \ 400 ap1[_i] += value; \ 401 /* Not sure LogFlops will slow dow the code or not */ \ 402 (void)PetscLogFlops(1.0); \ 403 } else ap1[_i] = value; \ 404 goto a_noinsert; \ 405 } \ 406 } \ 407 if (value == 0.0 && ignorezeroentries && row != col) { \ 408 low1 = 0; \ 409 high1 = nrow1; \ 410 goto a_noinsert; \ 411 } \ 412 if (nonew == 1) { \ 413 low1 = 0; \ 414 high1 = nrow1; \ 415 goto a_noinsert; \ 416 } \ 417 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 418 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 419 N = nrow1++ - 1; \ 420 a->nz++; \ 421 high1++; \ 422 /* shift up all the later entries in this row */ \ 423 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 424 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 425 rp1[_i] = col; \ 426 ap1[_i] = value; \ 427 A->nonzerostate++; \ 428 a_noinsert:; \ 429 ailen[row] = nrow1; \ 430 } 431 432 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 433 { \ 434 if (col <= lastcol2) low2 = 0; \ 435 else high2 = nrow2; \ 436 lastcol2 = col; \ 437 while (high2 - low2 > 5) { \ 438 t = (low2 + high2) / 2; \ 439 if (rp2[t] > col) high2 = t; \ 440 else low2 = t; \ 441 } \ 442 for (_i = low2; _i < high2; _i++) { \ 443 if (rp2[_i] > col) break; \ 444 if (rp2[_i] == col) { \ 445 if (addv == ADD_VALUES) { \ 446 ap2[_i] += value; \ 447 (void)PetscLogFlops(1.0); \ 448 } else ap2[_i] = value; \ 449 goto b_noinsert; \ 450 } \ 451 } \ 452 if (value == 0.0 && ignorezeroentries) { \ 453 low2 = 0; \ 454 high2 = nrow2; \ 455 goto b_noinsert; \ 456 } \ 457 if (nonew == 1) { \ 458 low2 = 0; \ 459 high2 = nrow2; \ 460 goto b_noinsert; \ 461 } \ 462 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 463 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 464 N = nrow2++ - 1; \ 465 b->nz++; \ 466 high2++; \ 467 /* shift up all the later entries in this row */ \ 468 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 469 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 470 rp2[_i] = col; \ 471 ap2[_i] = value; \ 472 B->nonzerostate++; \ 473 b_noinsert:; \ 474 bilen[row] = nrow2; \ 475 } 476 477 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 478 { 479 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 480 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 481 PetscInt l, *garray = mat->garray, diag; 482 PetscScalar *aa, *ba; 483 484 PetscFunctionBegin; 485 /* code only works for square matrices A */ 486 487 /* find size of row to the left of the diagonal part */ 488 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 489 row = row - diag; 490 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 491 if (garray[b->j[b->i[row] + l]] > diag) break; 492 } 493 if (l) { 494 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 495 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 496 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 497 } 498 499 /* diagonal part */ 500 if (a->i[row + 1] - a->i[row]) { 501 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 502 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 503 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 504 } 505 506 /* right of diagonal part */ 507 if (b->i[row + 1] - b->i[row] - l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 PetscFunctionReturn(PETSC_SUCCESS); 513 } 514 515 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 516 { 517 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 518 PetscScalar value = 0.0; 519 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 520 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 521 PetscBool roworiented = aij->roworiented; 522 523 /* Some Variables required in the macro */ 524 Mat A = aij->A; 525 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 526 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 527 PetscBool ignorezeroentries = a->ignorezeroentries; 528 Mat B = aij->B; 529 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 530 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 531 MatScalar *aa, *ba; 532 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 533 PetscInt nonew; 534 MatScalar *ap1, *ap2; 535 536 PetscFunctionBegin; 537 PetscCall(MatSeqAIJGetArray(A, &aa)); 538 PetscCall(MatSeqAIJGetArray(B, &ba)); 539 for (i = 0; i < m; i++) { 540 if (im[i] < 0) continue; 541 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 542 if (im[i] >= rstart && im[i] < rend) { 543 row = im[i] - rstart; 544 lastcol1 = -1; 545 rp1 = aj + ai[row]; 546 ap1 = aa + ai[row]; 547 rmax1 = aimax[row]; 548 nrow1 = ailen[row]; 549 low1 = 0; 550 high1 = nrow1; 551 lastcol2 = -1; 552 rp2 = bj + bi[row]; 553 ap2 = ba + bi[row]; 554 rmax2 = bimax[row]; 555 nrow2 = bilen[row]; 556 low2 = 0; 557 high2 = nrow2; 558 559 for (j = 0; j < n; j++) { 560 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 561 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 562 if (in[j] >= cstart && in[j] < cend) { 563 col = in[j] - cstart; 564 nonew = a->nonew; 565 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 566 } else if (in[j] < 0) { 567 continue; 568 } else { 569 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 570 if (mat->was_assembled) { 571 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 572 #if defined(PETSC_USE_CTABLE) 573 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 574 col--; 575 #else 576 col = aij->colmap[in[j]] - 1; 577 #endif 578 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 579 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 580 col = in[j]; 581 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 582 B = aij->B; 583 b = (Mat_SeqAIJ *)B->data; 584 bimax = b->imax; 585 bi = b->i; 586 bilen = b->ilen; 587 bj = b->j; 588 ba = b->a; 589 rp2 = bj + bi[row]; 590 ap2 = ba + bi[row]; 591 rmax2 = bimax[row]; 592 nrow2 = bilen[row]; 593 low2 = 0; 594 high2 = nrow2; 595 bm = aij->B->rmap->n; 596 ba = b->a; 597 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 598 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 599 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 600 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 601 } 602 } else col = in[j]; 603 nonew = b->nonew; 604 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 605 } 606 } 607 } else { 608 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 609 if (!aij->donotstash) { 610 mat->assembled = PETSC_FALSE; 611 if (roworiented) { 612 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 613 } else { 614 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 615 } 616 } 617 } 618 } 619 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 620 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 621 PetscFunctionReturn(PETSC_SUCCESS); 622 } 623 624 /* 625 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 626 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 627 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 628 */ 629 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 630 { 631 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 632 Mat A = aij->A; /* diagonal part of the matrix */ 633 Mat B = aij->B; /* offdiagonal part of the matrix */ 634 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 635 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 636 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 637 PetscInt *ailen = a->ilen, *aj = a->j; 638 PetscInt *bilen = b->ilen, *bj = b->j; 639 PetscInt am = aij->A->rmap->n, j; 640 PetscInt diag_so_far = 0, dnz; 641 PetscInt offd_so_far = 0, onz; 642 643 PetscFunctionBegin; 644 /* Iterate over all rows of the matrix */ 645 for (j = 0; j < am; j++) { 646 dnz = onz = 0; 647 /* Iterate over all non-zero columns of the current row */ 648 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 649 /* If column is in the diagonal */ 650 if (mat_j[col] >= cstart && mat_j[col] < cend) { 651 aj[diag_so_far++] = mat_j[col] - cstart; 652 dnz++; 653 } else { /* off-diagonal entries */ 654 bj[offd_so_far++] = mat_j[col]; 655 onz++; 656 } 657 } 658 ailen[j] = dnz; 659 bilen[j] = onz; 660 } 661 PetscFunctionReturn(PETSC_SUCCESS); 662 } 663 664 /* 665 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 666 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 667 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 668 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 669 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 670 */ 671 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 672 { 673 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 674 Mat A = aij->A; /* diagonal part of the matrix */ 675 Mat B = aij->B; /* offdiagonal part of the matrix */ 676 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 677 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 678 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 679 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 680 PetscInt *ailen = a->ilen, *aj = a->j; 681 PetscInt *bilen = b->ilen, *bj = b->j; 682 PetscInt am = aij->A->rmap->n, j; 683 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 684 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 685 PetscScalar *aa = a->a, *ba = b->a; 686 687 PetscFunctionBegin; 688 /* Iterate over all rows of the matrix */ 689 for (j = 0; j < am; j++) { 690 dnz_row = onz_row = 0; 691 rowstart_offd = full_offd_i[j]; 692 rowstart_diag = full_diag_i[j]; 693 /* Iterate over all non-zero columns of the current row */ 694 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 695 /* If column is in the diagonal */ 696 if (mat_j[col] >= cstart && mat_j[col] < cend) { 697 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 698 aa[rowstart_diag + dnz_row] = mat_a[col]; 699 dnz_row++; 700 } else { /* off-diagonal entries */ 701 bj[rowstart_offd + onz_row] = mat_j[col]; 702 ba[rowstart_offd + onz_row] = mat_a[col]; 703 onz_row++; 704 } 705 } 706 ailen[j] = dnz_row; 707 bilen[j] = onz_row; 708 } 709 PetscFunctionReturn(PETSC_SUCCESS); 710 } 711 712 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 713 { 714 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 715 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 716 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 717 718 PetscFunctionBegin; 719 for (i = 0; i < m; i++) { 720 if (idxm[i] < 0) continue; /* negative row */ 721 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 722 if (idxm[i] >= rstart && idxm[i] < rend) { 723 row = idxm[i] - rstart; 724 for (j = 0; j < n; j++) { 725 if (idxn[j] < 0) continue; /* negative column */ 726 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 727 if (idxn[j] >= cstart && idxn[j] < cend) { 728 col = idxn[j] - cstart; 729 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 730 } else { 731 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 732 #if defined(PETSC_USE_CTABLE) 733 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 734 col--; 735 #else 736 col = aij->colmap[idxn[j]] - 1; 737 #endif 738 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 739 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 740 } 741 } 742 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 743 } 744 PetscFunctionReturn(PETSC_SUCCESS); 745 } 746 747 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 748 { 749 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 750 PetscInt nstash, reallocs; 751 752 PetscFunctionBegin; 753 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 754 755 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 756 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 757 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 758 PetscFunctionReturn(PETSC_SUCCESS); 759 } 760 761 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 762 { 763 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 764 PetscMPIInt n; 765 PetscInt i, j, rstart, ncols, flg; 766 PetscInt *row, *col; 767 PetscBool other_disassembled; 768 PetscScalar *val; 769 770 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 771 772 PetscFunctionBegin; 773 if (!aij->donotstash && !mat->nooffprocentries) { 774 while (1) { 775 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 776 if (!flg) break; 777 778 for (i = 0; i < n;) { 779 /* Now identify the consecutive vals belonging to the same row */ 780 for (j = i, rstart = row[j]; j < n; j++) { 781 if (row[j] != rstart) break; 782 } 783 if (j < n) ncols = j - i; 784 else ncols = n - i; 785 /* Now assemble all these values with a single function call */ 786 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 787 i = j; 788 } 789 } 790 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 791 } 792 #if defined(PETSC_HAVE_DEVICE) 793 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 794 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 795 if (mat->boundtocpu) { 796 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 797 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 798 } 799 #endif 800 PetscCall(MatAssemblyBegin(aij->A, mode)); 801 PetscCall(MatAssemblyEnd(aij->A, mode)); 802 803 /* determine if any processor has disassembled, if so we must 804 also disassemble ourself, in order that we may reassemble. */ 805 /* 806 if nonzero structure of submatrix B cannot change then we know that 807 no processor disassembled thus we can skip this stuff 808 */ 809 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 810 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 811 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 812 PetscCall(MatDisAssemble_MPIAIJ(mat)); 813 } 814 } 815 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 816 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 817 #if defined(PETSC_HAVE_DEVICE) 818 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 819 #endif 820 PetscCall(MatAssemblyBegin(aij->B, mode)); 821 PetscCall(MatAssemblyEnd(aij->B, mode)); 822 823 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 824 825 aij->rowvalues = NULL; 826 827 PetscCall(VecDestroy(&aij->diag)); 828 829 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 830 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 831 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 832 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 833 } 834 #if defined(PETSC_HAVE_DEVICE) 835 mat->offloadmask = PETSC_OFFLOAD_BOTH; 836 #endif 837 PetscFunctionReturn(PETSC_SUCCESS); 838 } 839 840 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 841 { 842 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 843 844 PetscFunctionBegin; 845 PetscCall(MatZeroEntries(l->A)); 846 PetscCall(MatZeroEntries(l->B)); 847 PetscFunctionReturn(PETSC_SUCCESS); 848 } 849 850 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 851 { 852 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 853 PetscObjectState sA, sB; 854 PetscInt *lrows; 855 PetscInt r, len; 856 PetscBool cong, lch, gch; 857 858 PetscFunctionBegin; 859 /* get locally owned rows */ 860 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 861 PetscCall(MatHasCongruentLayouts(A, &cong)); 862 /* fix right hand side if needed */ 863 if (x && b) { 864 const PetscScalar *xx; 865 PetscScalar *bb; 866 867 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 868 PetscCall(VecGetArrayRead(x, &xx)); 869 PetscCall(VecGetArray(b, &bb)); 870 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 871 PetscCall(VecRestoreArrayRead(x, &xx)); 872 PetscCall(VecRestoreArray(b, &bb)); 873 } 874 875 sA = mat->A->nonzerostate; 876 sB = mat->B->nonzerostate; 877 878 if (diag != 0.0 && cong) { 879 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 880 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 881 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 882 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 883 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 884 PetscInt nnwA, nnwB; 885 PetscBool nnzA, nnzB; 886 887 nnwA = aijA->nonew; 888 nnwB = aijB->nonew; 889 nnzA = aijA->keepnonzeropattern; 890 nnzB = aijB->keepnonzeropattern; 891 if (!nnzA) { 892 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 893 aijA->nonew = 0; 894 } 895 if (!nnzB) { 896 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 897 aijB->nonew = 0; 898 } 899 /* Must zero here before the next loop */ 900 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 901 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 902 for (r = 0; r < len; ++r) { 903 const PetscInt row = lrows[r] + A->rmap->rstart; 904 if (row >= A->cmap->N) continue; 905 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 906 } 907 aijA->nonew = nnwA; 908 aijB->nonew = nnwB; 909 } else { 910 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 911 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 912 } 913 PetscCall(PetscFree(lrows)); 914 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 915 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 916 917 /* reduce nonzerostate */ 918 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 919 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 920 if (gch) A->nonzerostate++; 921 PetscFunctionReturn(PETSC_SUCCESS); 922 } 923 924 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 925 { 926 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 927 PetscMPIInt n = A->rmap->n; 928 PetscInt i, j, r, m, len = 0; 929 PetscInt *lrows, *owners = A->rmap->range; 930 PetscMPIInt p = 0; 931 PetscSFNode *rrows; 932 PetscSF sf; 933 const PetscScalar *xx; 934 PetscScalar *bb, *mask, *aij_a; 935 Vec xmask, lmask; 936 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 937 const PetscInt *aj, *ii, *ridx; 938 PetscScalar *aa; 939 940 PetscFunctionBegin; 941 /* Create SF where leaves are input rows and roots are owned rows */ 942 PetscCall(PetscMalloc1(n, &lrows)); 943 for (r = 0; r < n; ++r) lrows[r] = -1; 944 PetscCall(PetscMalloc1(N, &rrows)); 945 for (r = 0; r < N; ++r) { 946 const PetscInt idx = rows[r]; 947 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 948 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 949 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 950 } 951 rrows[r].rank = p; 952 rrows[r].index = rows[r] - owners[p]; 953 } 954 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 955 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 956 /* Collect flags for rows to be zeroed */ 957 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 958 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 959 PetscCall(PetscSFDestroy(&sf)); 960 /* Compress and put in row numbers */ 961 for (r = 0; r < n; ++r) 962 if (lrows[r] >= 0) lrows[len++] = r; 963 /* zero diagonal part of matrix */ 964 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 965 /* handle off diagonal part of matrix */ 966 PetscCall(MatCreateVecs(A, &xmask, NULL)); 967 PetscCall(VecDuplicate(l->lvec, &lmask)); 968 PetscCall(VecGetArray(xmask, &bb)); 969 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 970 PetscCall(VecRestoreArray(xmask, &bb)); 971 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 972 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 973 PetscCall(VecDestroy(&xmask)); 974 if (x && b) { /* this code is buggy when the row and column layout don't match */ 975 PetscBool cong; 976 977 PetscCall(MatHasCongruentLayouts(A, &cong)); 978 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 979 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 980 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecGetArrayRead(l->lvec, &xx)); 982 PetscCall(VecGetArray(b, &bb)); 983 } 984 PetscCall(VecGetArray(lmask, &mask)); 985 /* remove zeroed rows of off diagonal matrix */ 986 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 987 ii = aij->i; 988 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); 989 /* loop over all elements of off process part of matrix zeroing removed columns*/ 990 if (aij->compressedrow.use) { 991 m = aij->compressedrow.nrows; 992 ii = aij->compressedrow.i; 993 ridx = aij->compressedrow.rindex; 994 for (i = 0; i < m; i++) { 995 n = ii[i + 1] - ii[i]; 996 aj = aij->j + ii[i]; 997 aa = aij_a + ii[i]; 998 999 for (j = 0; j < n; j++) { 1000 if (PetscAbsScalar(mask[*aj])) { 1001 if (b) bb[*ridx] -= *aa * xx[*aj]; 1002 *aa = 0.0; 1003 } 1004 aa++; 1005 aj++; 1006 } 1007 ridx++; 1008 } 1009 } else { /* do not use compressed row format */ 1010 m = l->B->rmap->n; 1011 for (i = 0; i < m; i++) { 1012 n = ii[i + 1] - ii[i]; 1013 aj = aij->j + ii[i]; 1014 aa = aij_a + ii[i]; 1015 for (j = 0; j < n; j++) { 1016 if (PetscAbsScalar(mask[*aj])) { 1017 if (b) bb[i] -= *aa * xx[*aj]; 1018 *aa = 0.0; 1019 } 1020 aa++; 1021 aj++; 1022 } 1023 } 1024 } 1025 if (x && b) { 1026 PetscCall(VecRestoreArray(b, &bb)); 1027 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1028 } 1029 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1030 PetscCall(VecRestoreArray(lmask, &mask)); 1031 PetscCall(VecDestroy(&lmask)); 1032 PetscCall(PetscFree(lrows)); 1033 1034 /* only change matrix nonzero state if pattern was allowed to be changed */ 1035 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 1036 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1037 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1038 } 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } 1041 1042 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1043 { 1044 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1045 PetscInt nt; 1046 VecScatter Mvctx = a->Mvctx; 1047 1048 PetscFunctionBegin; 1049 PetscCall(VecGetLocalSize(xx, &nt)); 1050 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1051 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1052 PetscUseTypeMethod(a->A, mult, xx, yy); 1053 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1054 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1055 PetscFunctionReturn(PETSC_SUCCESS); 1056 } 1057 1058 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1059 { 1060 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1061 1062 PetscFunctionBegin; 1063 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 VecScatter Mvctx = a->Mvctx; 1071 1072 PetscFunctionBegin; 1073 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1074 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1075 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1076 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1077 PetscFunctionReturn(PETSC_SUCCESS); 1078 } 1079 1080 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1081 { 1082 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1083 1084 PetscFunctionBegin; 1085 /* do nondiagonal part */ 1086 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1087 /* do local part */ 1088 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1089 /* add partial results together */ 1090 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1091 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1092 PetscFunctionReturn(PETSC_SUCCESS); 1093 } 1094 1095 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1096 { 1097 MPI_Comm comm; 1098 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1099 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1100 IS Me, Notme; 1101 PetscInt M, N, first, last, *notme, i; 1102 PetscBool lf; 1103 PetscMPIInt size; 1104 1105 PetscFunctionBegin; 1106 /* Easy test: symmetric diagonal block */ 1107 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1108 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1109 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1110 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1111 PetscCallMPI(MPI_Comm_size(comm, &size)); 1112 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1113 1114 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1115 PetscCall(MatGetSize(Amat, &M, &N)); 1116 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1117 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1118 for (i = 0; i < first; i++) notme[i] = i; 1119 for (i = last; i < M; i++) notme[i - last + first] = i; 1120 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1121 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1122 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1123 Aoff = Aoffs[0]; 1124 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1125 Boff = Boffs[0]; 1126 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1127 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1128 PetscCall(MatDestroyMatrices(1, &Boffs)); 1129 PetscCall(ISDestroy(&Me)); 1130 PetscCall(ISDestroy(&Notme)); 1131 PetscCall(PetscFree(notme)); 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) 1136 { 1137 PetscFunctionBegin; 1138 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1139 PetscFunctionReturn(PETSC_SUCCESS); 1140 } 1141 1142 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1143 { 1144 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1145 1146 PetscFunctionBegin; 1147 /* do nondiagonal part */ 1148 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1149 /* do local part */ 1150 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1151 /* add partial results together */ 1152 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1153 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1154 PetscFunctionReturn(PETSC_SUCCESS); 1155 } 1156 1157 /* 1158 This only works correctly for square matrices where the subblock A->A is the 1159 diagonal block 1160 */ 1161 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1162 { 1163 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1164 1165 PetscFunctionBegin; 1166 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1167 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1168 PetscCall(MatGetDiagonal(a->A, v)); 1169 PetscFunctionReturn(PETSC_SUCCESS); 1170 } 1171 1172 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1173 { 1174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1175 1176 PetscFunctionBegin; 1177 PetscCall(MatScale(a->A, aa)); 1178 PetscCall(MatScale(a->B, aa)); 1179 PetscFunctionReturn(PETSC_SUCCESS); 1180 } 1181 1182 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1183 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) 1184 { 1185 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1186 1187 PetscFunctionBegin; 1188 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1189 PetscCall(PetscFree(aij->Aperm1)); 1190 PetscCall(PetscFree(aij->Bperm1)); 1191 PetscCall(PetscFree(aij->Ajmap1)); 1192 PetscCall(PetscFree(aij->Bjmap1)); 1193 1194 PetscCall(PetscFree(aij->Aimap2)); 1195 PetscCall(PetscFree(aij->Bimap2)); 1196 PetscCall(PetscFree(aij->Aperm2)); 1197 PetscCall(PetscFree(aij->Bperm2)); 1198 PetscCall(PetscFree(aij->Ajmap2)); 1199 PetscCall(PetscFree(aij->Bjmap2)); 1200 1201 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1202 PetscCall(PetscFree(aij->Cperm1)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1207 { 1208 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1209 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1210 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1211 const PetscInt *garray = aij->garray; 1212 const PetscScalar *aa, *ba; 1213 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1214 PetscInt *rowlens; 1215 PetscInt *colidxs; 1216 PetscScalar *matvals; 1217 1218 PetscFunctionBegin; 1219 PetscCall(PetscViewerSetUp(viewer)); 1220 1221 M = mat->rmap->N; 1222 N = mat->cmap->N; 1223 m = mat->rmap->n; 1224 rs = mat->rmap->rstart; 1225 cs = mat->cmap->rstart; 1226 nz = A->nz + B->nz; 1227 1228 /* write matrix header */ 1229 header[0] = MAT_FILE_CLASSID; 1230 header[1] = M; 1231 header[2] = N; 1232 header[3] = nz; 1233 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1234 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1235 1236 /* fill in and store row lengths */ 1237 PetscCall(PetscMalloc1(m, &rowlens)); 1238 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1239 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1240 PetscCall(PetscFree(rowlens)); 1241 1242 /* fill in and store column indices */ 1243 PetscCall(PetscMalloc1(nz, &colidxs)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 colidxs[cnt++] = garray[B->j[jb]]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1250 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1251 } 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1254 PetscCall(PetscFree(colidxs)); 1255 1256 /* fill in and store nonzero values */ 1257 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1258 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1259 PetscCall(PetscMalloc1(nz, &matvals)); 1260 for (cnt = 0, i = 0; i < m; i++) { 1261 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1262 if (garray[B->j[jb]] > cs) break; 1263 matvals[cnt++] = ba[jb]; 1264 } 1265 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1266 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1267 } 1268 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1269 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1270 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1271 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1272 PetscCall(PetscFree(matvals)); 1273 1274 /* write block size option to the viewer's .info file */ 1275 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1276 PetscFunctionReturn(PETSC_SUCCESS); 1277 } 1278 1279 #include <petscdraw.h> 1280 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1281 { 1282 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1283 PetscMPIInt rank = aij->rank, size = aij->size; 1284 PetscBool isdraw, iascii, isbinary; 1285 PetscViewer sviewer; 1286 PetscViewerFormat format; 1287 1288 PetscFunctionBegin; 1289 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1290 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1291 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1292 if (iascii) { 1293 PetscCall(PetscViewerGetFormat(viewer, &format)); 1294 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1295 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1296 PetscCall(PetscMalloc1(size, &nz)); 1297 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1298 for (i = 0; i < (PetscInt)size; i++) { 1299 nmax = PetscMax(nmax, nz[i]); 1300 nmin = PetscMin(nmin, nz[i]); 1301 navg += nz[i]; 1302 } 1303 PetscCall(PetscFree(nz)); 1304 navg = navg / size; 1305 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 PetscCall(PetscViewerGetFormat(viewer, &format)); 1309 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1310 MatInfo info; 1311 PetscInt *inodes = NULL; 1312 1313 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1314 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1315 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1316 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1317 if (!inodes) { 1318 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1319 (double)info.memory)); 1320 } else { 1321 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1322 (double)info.memory)); 1323 } 1324 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1325 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1326 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1327 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1328 PetscCall(PetscViewerFlush(viewer)); 1329 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1330 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1331 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1332 PetscFunctionReturn(PETSC_SUCCESS); 1333 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1334 PetscInt inodecount, inodelimit, *inodes; 1335 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1336 if (inodes) { 1337 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1338 } else { 1339 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1340 } 1341 PetscFunctionReturn(PETSC_SUCCESS); 1342 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1343 PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 } else if (isbinary) { 1346 if (size == 1) { 1347 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1348 PetscCall(MatView(aij->A, viewer)); 1349 } else { 1350 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1351 } 1352 PetscFunctionReturn(PETSC_SUCCESS); 1353 } else if (iascii && size == 1) { 1354 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1355 PetscCall(MatView(aij->A, viewer)); 1356 PetscFunctionReturn(PETSC_SUCCESS); 1357 } else if (isdraw) { 1358 PetscDraw draw; 1359 PetscBool isnull; 1360 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1361 PetscCall(PetscDrawIsNull(draw, &isnull)); 1362 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1363 } 1364 1365 { /* assemble the entire matrix onto first processor */ 1366 Mat A = NULL, Av; 1367 IS isrow, iscol; 1368 1369 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1370 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1371 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1372 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1373 /* The commented code uses MatCreateSubMatrices instead */ 1374 /* 1375 Mat *AA, A = NULL, Av; 1376 IS isrow,iscol; 1377 1378 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1379 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1380 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1381 if (rank == 0) { 1382 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1383 A = AA[0]; 1384 Av = AA[0]; 1385 } 1386 PetscCall(MatDestroySubMatrices(1,&AA)); 1387 */ 1388 PetscCall(ISDestroy(&iscol)); 1389 PetscCall(ISDestroy(&isrow)); 1390 /* 1391 Everyone has to call to draw the matrix since the graphics waits are 1392 synchronized across all processors that share the PetscDraw object 1393 */ 1394 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1395 if (rank == 0) { 1396 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1397 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1398 } 1399 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1400 PetscCall(PetscViewerFlush(viewer)); 1401 PetscCall(MatDestroy(&A)); 1402 } 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1407 { 1408 PetscBool iascii, isdraw, issocket, isbinary; 1409 1410 PetscFunctionBegin; 1411 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1412 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1413 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1414 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1415 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1416 PetscFunctionReturn(PETSC_SUCCESS); 1417 } 1418 1419 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1420 { 1421 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1422 Vec bb1 = NULL; 1423 PetscBool hasop; 1424 1425 PetscFunctionBegin; 1426 if (flag == SOR_APPLY_UPPER) { 1427 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1428 PetscFunctionReturn(PETSC_SUCCESS); 1429 } 1430 1431 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1432 1433 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 1439 while (its--) { 1440 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1442 1443 /* update rhs: bb1 = bb - B*x */ 1444 PetscCall(VecScale(mat->lvec, -1.0)); 1445 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1446 1447 /* local sweep */ 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1449 } 1450 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1451 if (flag & SOR_ZERO_INITIAL_GUESS) { 1452 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1453 its--; 1454 } 1455 while (its--) { 1456 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1458 1459 /* update rhs: bb1 = bb - B*x */ 1460 PetscCall(VecScale(mat->lvec, -1.0)); 1461 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1462 1463 /* local sweep */ 1464 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1465 } 1466 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1467 if (flag & SOR_ZERO_INITIAL_GUESS) { 1468 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1469 its--; 1470 } 1471 while (its--) { 1472 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1474 1475 /* update rhs: bb1 = bb - B*x */ 1476 PetscCall(VecScale(mat->lvec, -1.0)); 1477 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1478 1479 /* local sweep */ 1480 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1481 } 1482 } else if (flag & SOR_EISENSTAT) { 1483 Vec xx1; 1484 1485 PetscCall(VecDuplicate(bb, &xx1)); 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1487 1488 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1489 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1490 if (!mat->diag) { 1491 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1492 PetscCall(MatGetDiagonal(matin, mat->diag)); 1493 } 1494 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1495 if (hasop) { 1496 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1497 } else { 1498 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1499 } 1500 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1501 1502 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1503 1504 /* local sweep */ 1505 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1506 PetscCall(VecAXPY(xx, 1.0, xx1)); 1507 PetscCall(VecDestroy(&xx1)); 1508 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1509 1510 PetscCall(VecDestroy(&bb1)); 1511 1512 matin->factorerrortype = mat->A->factorerrortype; 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1517 { 1518 Mat aA, aB, Aperm; 1519 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1520 PetscScalar *aa, *ba; 1521 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1522 PetscSF rowsf, sf; 1523 IS parcolp = NULL; 1524 PetscBool done; 1525 1526 PetscFunctionBegin; 1527 PetscCall(MatGetLocalSize(A, &m, &n)); 1528 PetscCall(ISGetIndices(rowp, &rwant)); 1529 PetscCall(ISGetIndices(colp, &cwant)); 1530 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1531 1532 /* Invert row permutation to find out where my rows should go */ 1533 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1534 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1535 PetscCall(PetscSFSetFromOptions(rowsf)); 1536 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1537 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1538 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1539 1540 /* Invert column permutation to find out where my columns should go */ 1541 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1542 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1543 PetscCall(PetscSFSetFromOptions(sf)); 1544 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1545 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1546 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1547 PetscCall(PetscSFDestroy(&sf)); 1548 1549 PetscCall(ISRestoreIndices(rowp, &rwant)); 1550 PetscCall(ISRestoreIndices(colp, &cwant)); 1551 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1552 1553 /* Find out where my gcols should go */ 1554 PetscCall(MatGetSize(aB, NULL, &ng)); 1555 PetscCall(PetscMalloc1(ng, &gcdest)); 1556 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1557 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1558 PetscCall(PetscSFSetFromOptions(sf)); 1559 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1560 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1561 PetscCall(PetscSFDestroy(&sf)); 1562 1563 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1564 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1565 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1566 for (i = 0; i < m; i++) { 1567 PetscInt row = rdest[i]; 1568 PetscMPIInt rowner; 1569 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1570 for (j = ai[i]; j < ai[i + 1]; j++) { 1571 PetscInt col = cdest[aj[j]]; 1572 PetscMPIInt cowner; 1573 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1574 if (rowner == cowner) dnnz[i]++; 1575 else onnz[i]++; 1576 } 1577 for (j = bi[i]; j < bi[i + 1]; j++) { 1578 PetscInt col = gcdest[bj[j]]; 1579 PetscMPIInt cowner; 1580 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1581 if (rowner == cowner) dnnz[i]++; 1582 else onnz[i]++; 1583 } 1584 } 1585 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1586 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1587 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1588 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1589 PetscCall(PetscSFDestroy(&rowsf)); 1590 1591 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1592 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1593 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1594 for (i = 0; i < m; i++) { 1595 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1596 PetscInt j0, rowlen; 1597 rowlen = ai[i + 1] - ai[i]; 1598 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1599 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1600 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1601 } 1602 rowlen = bi[i + 1] - bi[i]; 1603 for (j0 = j = 0; j < rowlen; j0 = j) { 1604 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1605 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1606 } 1607 } 1608 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1609 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1610 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1611 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1612 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1613 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1614 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1615 PetscCall(PetscFree3(work, rdest, cdest)); 1616 PetscCall(PetscFree(gcdest)); 1617 if (parcolp) PetscCall(ISDestroy(&colp)); 1618 *B = Aperm; 1619 PetscFunctionReturn(PETSC_SUCCESS); 1620 } 1621 1622 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1623 { 1624 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1625 1626 PetscFunctionBegin; 1627 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1628 if (ghosts) *ghosts = aij->garray; 1629 PetscFunctionReturn(PETSC_SUCCESS); 1630 } 1631 1632 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1633 { 1634 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1635 Mat A = mat->A, B = mat->B; 1636 PetscLogDouble isend[5], irecv[5]; 1637 1638 PetscFunctionBegin; 1639 info->block_size = 1.0; 1640 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1641 1642 isend[0] = info->nz_used; 1643 isend[1] = info->nz_allocated; 1644 isend[2] = info->nz_unneeded; 1645 isend[3] = info->memory; 1646 isend[4] = info->mallocs; 1647 1648 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1649 1650 isend[0] += info->nz_used; 1651 isend[1] += info->nz_allocated; 1652 isend[2] += info->nz_unneeded; 1653 isend[3] += info->memory; 1654 isend[4] += info->mallocs; 1655 if (flag == MAT_LOCAL) { 1656 info->nz_used = isend[0]; 1657 info->nz_allocated = isend[1]; 1658 info->nz_unneeded = isend[2]; 1659 info->memory = isend[3]; 1660 info->mallocs = isend[4]; 1661 } else if (flag == MAT_GLOBAL_MAX) { 1662 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1663 1664 info->nz_used = irecv[0]; 1665 info->nz_allocated = irecv[1]; 1666 info->nz_unneeded = irecv[2]; 1667 info->memory = irecv[3]; 1668 info->mallocs = irecv[4]; 1669 } else if (flag == MAT_GLOBAL_SUM) { 1670 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1671 1672 info->nz_used = irecv[0]; 1673 info->nz_allocated = irecv[1]; 1674 info->nz_unneeded = irecv[2]; 1675 info->memory = irecv[3]; 1676 info->mallocs = irecv[4]; 1677 } 1678 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1679 info->fill_ratio_needed = 0; 1680 info->factor_mallocs = 0; 1681 PetscFunctionReturn(PETSC_SUCCESS); 1682 } 1683 1684 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1685 { 1686 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1687 1688 PetscFunctionBegin; 1689 switch (op) { 1690 case MAT_NEW_NONZERO_LOCATIONS: 1691 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1692 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1693 case MAT_KEEP_NONZERO_PATTERN: 1694 case MAT_NEW_NONZERO_LOCATION_ERR: 1695 case MAT_USE_INODES: 1696 case MAT_IGNORE_ZERO_ENTRIES: 1697 case MAT_FORM_EXPLICIT_TRANSPOSE: 1698 MatCheckPreallocated(A, 1); 1699 PetscCall(MatSetOption(a->A, op, flg)); 1700 PetscCall(MatSetOption(a->B, op, flg)); 1701 break; 1702 case MAT_ROW_ORIENTED: 1703 MatCheckPreallocated(A, 1); 1704 a->roworiented = flg; 1705 1706 PetscCall(MatSetOption(a->A, op, flg)); 1707 PetscCall(MatSetOption(a->B, op, flg)); 1708 break; 1709 case MAT_FORCE_DIAGONAL_ENTRIES: 1710 case MAT_SORTED_FULL: 1711 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1712 break; 1713 case MAT_IGNORE_OFF_PROC_ENTRIES: 1714 a->donotstash = flg; 1715 break; 1716 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1717 case MAT_SPD: 1718 case MAT_SYMMETRIC: 1719 case MAT_STRUCTURALLY_SYMMETRIC: 1720 case MAT_HERMITIAN: 1721 case MAT_SYMMETRY_ETERNAL: 1722 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1723 case MAT_SPD_ETERNAL: 1724 /* if the diagonal matrix is square it inherits some of the properties above */ 1725 break; 1726 case MAT_SUBMAT_SINGLEIS: 1727 A->submat_singleis = flg; 1728 break; 1729 case MAT_STRUCTURE_ONLY: 1730 /* The option is handled directly by MatSetOption() */ 1731 break; 1732 default: 1733 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1734 } 1735 PetscFunctionReturn(PETSC_SUCCESS); 1736 } 1737 1738 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1739 { 1740 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1741 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1742 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1743 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1744 PetscInt *cmap, *idx_p; 1745 1746 PetscFunctionBegin; 1747 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1748 mat->getrowactive = PETSC_TRUE; 1749 1750 if (!mat->rowvalues && (idx || v)) { 1751 /* 1752 allocate enough space to hold information from the longest row. 1753 */ 1754 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1755 PetscInt max = 1, tmp; 1756 for (i = 0; i < matin->rmap->n; i++) { 1757 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1758 if (max < tmp) max = tmp; 1759 } 1760 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1761 } 1762 1763 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1764 lrow = row - rstart; 1765 1766 pvA = &vworkA; 1767 pcA = &cworkA; 1768 pvB = &vworkB; 1769 pcB = &cworkB; 1770 if (!v) { 1771 pvA = NULL; 1772 pvB = NULL; 1773 } 1774 if (!idx) { 1775 pcA = NULL; 1776 if (!v) pcB = NULL; 1777 } 1778 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1779 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1780 nztot = nzA + nzB; 1781 1782 cmap = mat->garray; 1783 if (v || idx) { 1784 if (nztot) { 1785 /* Sort by increasing column numbers, assuming A and B already sorted */ 1786 PetscInt imark = -1; 1787 if (v) { 1788 *v = v_p = mat->rowvalues; 1789 for (i = 0; i < nzB; i++) { 1790 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1791 else break; 1792 } 1793 imark = i; 1794 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1795 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1796 } 1797 if (idx) { 1798 *idx = idx_p = mat->rowindices; 1799 if (imark > -1) { 1800 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1801 } else { 1802 for (i = 0; i < nzB; i++) { 1803 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1804 else break; 1805 } 1806 imark = i; 1807 } 1808 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1809 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1810 } 1811 } else { 1812 if (idx) *idx = NULL; 1813 if (v) *v = NULL; 1814 } 1815 } 1816 *nz = nztot; 1817 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1818 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 1822 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1823 { 1824 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1825 1826 PetscFunctionBegin; 1827 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1828 aij->getrowactive = PETSC_FALSE; 1829 PetscFunctionReturn(PETSC_SUCCESS); 1830 } 1831 1832 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1833 { 1834 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1835 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1836 PetscInt i, j, cstart = mat->cmap->rstart; 1837 PetscReal sum = 0.0; 1838 const MatScalar *v, *amata, *bmata; 1839 1840 PetscFunctionBegin; 1841 if (aij->size == 1) { 1842 PetscCall(MatNorm(aij->A, type, norm)); 1843 } else { 1844 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1845 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1846 if (type == NORM_FROBENIUS) { 1847 v = amata; 1848 for (i = 0; i < amat->nz; i++) { 1849 sum += PetscRealPart(PetscConj(*v) * (*v)); 1850 v++; 1851 } 1852 v = bmata; 1853 for (i = 0; i < bmat->nz; i++) { 1854 sum += PetscRealPart(PetscConj(*v) * (*v)); 1855 v++; 1856 } 1857 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1858 *norm = PetscSqrtReal(*norm); 1859 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1860 } else if (type == NORM_1) { /* max column norm */ 1861 PetscReal *tmp, *tmp2; 1862 PetscInt *jj, *garray = aij->garray; 1863 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1864 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1865 *norm = 0.0; 1866 v = amata; 1867 jj = amat->j; 1868 for (j = 0; j < amat->nz; j++) { 1869 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 v = bmata; 1873 jj = bmat->j; 1874 for (j = 0; j < bmat->nz; j++) { 1875 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1876 v++; 1877 } 1878 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1879 for (j = 0; j < mat->cmap->N; j++) { 1880 if (tmp2[j] > *norm) *norm = tmp2[j]; 1881 } 1882 PetscCall(PetscFree(tmp)); 1883 PetscCall(PetscFree(tmp2)); 1884 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1885 } else if (type == NORM_INFINITY) { /* max row norm */ 1886 PetscReal ntemp = 0.0; 1887 for (j = 0; j < aij->A->rmap->n; j++) { 1888 v = amata + amat->i[j]; 1889 sum = 0.0; 1890 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1891 sum += PetscAbsScalar(*v); 1892 v++; 1893 } 1894 v = bmata + bmat->i[j]; 1895 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1896 sum += PetscAbsScalar(*v); 1897 v++; 1898 } 1899 if (sum > ntemp) ntemp = sum; 1900 } 1901 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1902 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1903 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1904 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1905 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1906 } 1907 PetscFunctionReturn(PETSC_SUCCESS); 1908 } 1909 1910 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1911 { 1912 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1913 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1914 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1915 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1916 Mat B, A_diag, *B_diag; 1917 const MatScalar *pbv, *bv; 1918 1919 PetscFunctionBegin; 1920 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1921 ma = A->rmap->n; 1922 na = A->cmap->n; 1923 mb = a->B->rmap->n; 1924 nb = a->B->cmap->n; 1925 ai = Aloc->i; 1926 aj = Aloc->j; 1927 bi = Bloc->i; 1928 bj = Bloc->j; 1929 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1930 PetscInt *d_nnz, *g_nnz, *o_nnz; 1931 PetscSFNode *oloc; 1932 PETSC_UNUSED PetscSF sf; 1933 1934 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1935 /* compute d_nnz for preallocation */ 1936 PetscCall(PetscArrayzero(d_nnz, na)); 1937 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1938 /* compute local off-diagonal contributions */ 1939 PetscCall(PetscArrayzero(g_nnz, nb)); 1940 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1941 /* map those to global */ 1942 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1943 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1944 PetscCall(PetscSFSetFromOptions(sf)); 1945 PetscCall(PetscArrayzero(o_nnz, na)); 1946 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1947 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1948 PetscCall(PetscSFDestroy(&sf)); 1949 1950 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1951 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1952 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1953 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1954 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1955 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1956 } else { 1957 B = *matout; 1958 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1959 } 1960 1961 b = (Mat_MPIAIJ *)B->data; 1962 A_diag = a->A; 1963 B_diag = &b->A; 1964 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1965 A_diag_ncol = A_diag->cmap->N; 1966 B_diag_ilen = sub_B_diag->ilen; 1967 B_diag_i = sub_B_diag->i; 1968 1969 /* Set ilen for diagonal of B */ 1970 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1971 1972 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1973 very quickly (=without using MatSetValues), because all writes are local. */ 1974 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1975 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1976 1977 /* copy over the B part */ 1978 PetscCall(PetscMalloc1(bi[mb], &cols)); 1979 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1980 pbv = bv; 1981 row = A->rmap->rstart; 1982 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1983 cols_tmp = cols; 1984 for (i = 0; i < mb; i++) { 1985 ncol = bi[i + 1] - bi[i]; 1986 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1987 row++; 1988 pbv += ncol; 1989 cols_tmp += ncol; 1990 } 1991 PetscCall(PetscFree(cols)); 1992 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1993 1994 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1995 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1996 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1997 *matout = B; 1998 } else { 1999 PetscCall(MatHeaderMerge(A, &B)); 2000 } 2001 PetscFunctionReturn(PETSC_SUCCESS); 2002 } 2003 2004 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 2005 { 2006 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2007 Mat a = aij->A, b = aij->B; 2008 PetscInt s1, s2, s3; 2009 2010 PetscFunctionBegin; 2011 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 2012 if (rr) { 2013 PetscCall(VecGetLocalSize(rr, &s1)); 2014 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2015 /* Overlap communication with computation. */ 2016 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2017 } 2018 if (ll) { 2019 PetscCall(VecGetLocalSize(ll, &s1)); 2020 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2021 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2022 } 2023 /* scale the diagonal block */ 2024 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2025 2026 if (rr) { 2027 /* Do a scatter end and then right scale the off-diagonal block */ 2028 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2029 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2030 } 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 2038 PetscFunctionBegin; 2039 PetscCall(MatSetUnfactored(a->A)); 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2044 { 2045 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2046 Mat a, b, c, d; 2047 PetscBool flg; 2048 2049 PetscFunctionBegin; 2050 a = matA->A; 2051 b = matA->B; 2052 c = matB->A; 2053 d = matB->B; 2054 2055 PetscCall(MatEqual(a, c, &flg)); 2056 if (flg) PetscCall(MatEqual(b, d, &flg)); 2057 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2058 PetscFunctionReturn(PETSC_SUCCESS); 2059 } 2060 2061 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2062 { 2063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2064 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2065 2066 PetscFunctionBegin; 2067 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2068 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2069 /* because of the column compression in the off-processor part of the matrix a->B, 2070 the number of columns in a->B and b->B may be different, hence we cannot call 2071 the MatCopy() directly on the two parts. If need be, we can provide a more 2072 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2073 then copying the submatrices */ 2074 PetscCall(MatCopy_Basic(A, B, str)); 2075 } else { 2076 PetscCall(MatCopy(a->A, b->A, str)); 2077 PetscCall(MatCopy(a->B, b->B, str)); 2078 } 2079 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 /* 2084 Computes the number of nonzeros per row needed for preallocation when X and Y 2085 have different nonzero structure. 2086 */ 2087 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2088 { 2089 PetscInt i, j, k, nzx, nzy; 2090 2091 PetscFunctionBegin; 2092 /* Set the number of nonzeros in the new matrix */ 2093 for (i = 0; i < m; i++) { 2094 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2095 nzx = xi[i + 1] - xi[i]; 2096 nzy = yi[i + 1] - yi[i]; 2097 nnz[i] = 0; 2098 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2099 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2100 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2101 nnz[i]++; 2102 } 2103 for (; k < nzy; k++) nnz[i]++; 2104 } 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2109 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2110 { 2111 PetscInt m = Y->rmap->N; 2112 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2113 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2114 2115 PetscFunctionBegin; 2116 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2117 PetscFunctionReturn(PETSC_SUCCESS); 2118 } 2119 2120 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2121 { 2122 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2123 2124 PetscFunctionBegin; 2125 if (str == SAME_NONZERO_PATTERN) { 2126 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2127 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2128 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2129 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2130 } else { 2131 Mat B; 2132 PetscInt *nnz_d, *nnz_o; 2133 2134 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2135 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2136 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2137 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2138 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2139 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2140 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2141 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2142 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2143 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2144 PetscCall(MatHeaderMerge(Y, &B)); 2145 PetscCall(PetscFree(nnz_d)); 2146 PetscCall(PetscFree(nnz_o)); 2147 } 2148 PetscFunctionReturn(PETSC_SUCCESS); 2149 } 2150 2151 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2152 2153 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2154 { 2155 PetscFunctionBegin; 2156 if (PetscDefined(USE_COMPLEX)) { 2157 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2158 2159 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2160 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2161 } 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2166 { 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2168 2169 PetscFunctionBegin; 2170 PetscCall(MatRealPart(a->A)); 2171 PetscCall(MatRealPart(a->B)); 2172 PetscFunctionReturn(PETSC_SUCCESS); 2173 } 2174 2175 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2176 { 2177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2178 2179 PetscFunctionBegin; 2180 PetscCall(MatImaginaryPart(a->A)); 2181 PetscCall(MatImaginaryPart(a->B)); 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2186 { 2187 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2188 PetscInt i, *idxb = NULL, m = A->rmap->n; 2189 PetscScalar *va, *vv; 2190 Vec vB, vA; 2191 const PetscScalar *vb; 2192 2193 PetscFunctionBegin; 2194 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2195 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2196 2197 PetscCall(VecGetArrayWrite(vA, &va)); 2198 if (idx) { 2199 for (i = 0; i < m; i++) { 2200 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2201 } 2202 } 2203 2204 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2205 PetscCall(PetscMalloc1(m, &idxb)); 2206 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2207 2208 PetscCall(VecGetArrayWrite(v, &vv)); 2209 PetscCall(VecGetArrayRead(vB, &vb)); 2210 for (i = 0; i < m; i++) { 2211 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2212 vv[i] = vb[i]; 2213 if (idx) idx[i] = a->garray[idxb[i]]; 2214 } else { 2215 vv[i] = va[i]; 2216 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2217 } 2218 } 2219 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2220 PetscCall(VecRestoreArrayWrite(vA, &va)); 2221 PetscCall(VecRestoreArrayRead(vB, &vb)); 2222 PetscCall(PetscFree(idxb)); 2223 PetscCall(VecDestroy(&vA)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2229 { 2230 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2231 PetscInt m = A->rmap->n, n = A->cmap->n; 2232 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2233 PetscInt *cmap = mat->garray; 2234 PetscInt *diagIdx, *offdiagIdx; 2235 Vec diagV, offdiagV; 2236 PetscScalar *a, *diagA, *offdiagA; 2237 const PetscScalar *ba, *bav; 2238 PetscInt r, j, col, ncols, *bi, *bj; 2239 Mat B = mat->B; 2240 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2241 2242 PetscFunctionBegin; 2243 /* When a process holds entire A and other processes have no entry */ 2244 if (A->cmap->N == n) { 2245 PetscCall(VecGetArrayWrite(v, &diagA)); 2246 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2247 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2248 PetscCall(VecDestroy(&diagV)); 2249 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } else if (n == 0) { 2252 if (m) { 2253 PetscCall(VecGetArrayWrite(v, &a)); 2254 for (r = 0; r < m; r++) { 2255 a[r] = 0.0; 2256 if (idx) idx[r] = -1; 2257 } 2258 PetscCall(VecRestoreArrayWrite(v, &a)); 2259 } 2260 PetscFunctionReturn(PETSC_SUCCESS); 2261 } 2262 2263 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2264 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2265 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2266 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2267 2268 /* Get offdiagIdx[] for implicit 0.0 */ 2269 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2270 ba = bav; 2271 bi = b->i; 2272 bj = b->j; 2273 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2274 for (r = 0; r < m; r++) { 2275 ncols = bi[r + 1] - bi[r]; 2276 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2277 offdiagA[r] = *ba; 2278 offdiagIdx[r] = cmap[0]; 2279 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2280 offdiagA[r] = 0.0; 2281 2282 /* Find first hole in the cmap */ 2283 for (j = 0; j < ncols; j++) { 2284 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2285 if (col > j && j < cstart) { 2286 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2287 break; 2288 } else if (col > j + n && j >= cstart) { 2289 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2290 break; 2291 } 2292 } 2293 if (j == ncols && ncols < A->cmap->N - n) { 2294 /* a hole is outside compressed Bcols */ 2295 if (ncols == 0) { 2296 if (cstart) { 2297 offdiagIdx[r] = 0; 2298 } else offdiagIdx[r] = cend; 2299 } else { /* ncols > 0 */ 2300 offdiagIdx[r] = cmap[ncols - 1] + 1; 2301 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2302 } 2303 } 2304 } 2305 2306 for (j = 0; j < ncols; j++) { 2307 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2308 offdiagA[r] = *ba; 2309 offdiagIdx[r] = cmap[*bj]; 2310 } 2311 ba++; 2312 bj++; 2313 } 2314 } 2315 2316 PetscCall(VecGetArrayWrite(v, &a)); 2317 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2318 for (r = 0; r < m; ++r) { 2319 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2320 a[r] = diagA[r]; 2321 if (idx) idx[r] = cstart + diagIdx[r]; 2322 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2323 a[r] = diagA[r]; 2324 if (idx) { 2325 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2326 idx[r] = cstart + diagIdx[r]; 2327 } else idx[r] = offdiagIdx[r]; 2328 } 2329 } else { 2330 a[r] = offdiagA[r]; 2331 if (idx) idx[r] = offdiagIdx[r]; 2332 } 2333 } 2334 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2335 PetscCall(VecRestoreArrayWrite(v, &a)); 2336 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2337 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2338 PetscCall(VecDestroy(&diagV)); 2339 PetscCall(VecDestroy(&offdiagV)); 2340 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2345 { 2346 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2347 PetscInt m = A->rmap->n, n = A->cmap->n; 2348 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2349 PetscInt *cmap = mat->garray; 2350 PetscInt *diagIdx, *offdiagIdx; 2351 Vec diagV, offdiagV; 2352 PetscScalar *a, *diagA, *offdiagA; 2353 const PetscScalar *ba, *bav; 2354 PetscInt r, j, col, ncols, *bi, *bj; 2355 Mat B = mat->B; 2356 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2357 2358 PetscFunctionBegin; 2359 /* When a process holds entire A and other processes have no entry */ 2360 if (A->cmap->N == n) { 2361 PetscCall(VecGetArrayWrite(v, &diagA)); 2362 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2363 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2364 PetscCall(VecDestroy(&diagV)); 2365 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } else if (n == 0) { 2368 if (m) { 2369 PetscCall(VecGetArrayWrite(v, &a)); 2370 for (r = 0; r < m; r++) { 2371 a[r] = PETSC_MAX_REAL; 2372 if (idx) idx[r] = -1; 2373 } 2374 PetscCall(VecRestoreArrayWrite(v, &a)); 2375 } 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2380 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2381 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2382 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2383 2384 /* Get offdiagIdx[] for implicit 0.0 */ 2385 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2386 ba = bav; 2387 bi = b->i; 2388 bj = b->j; 2389 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2390 for (r = 0; r < m; r++) { 2391 ncols = bi[r + 1] - bi[r]; 2392 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2393 offdiagA[r] = *ba; 2394 offdiagIdx[r] = cmap[0]; 2395 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2396 offdiagA[r] = 0.0; 2397 2398 /* Find first hole in the cmap */ 2399 for (j = 0; j < ncols; j++) { 2400 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2401 if (col > j && j < cstart) { 2402 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2403 break; 2404 } else if (col > j + n && j >= cstart) { 2405 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2406 break; 2407 } 2408 } 2409 if (j == ncols && ncols < A->cmap->N - n) { 2410 /* a hole is outside compressed Bcols */ 2411 if (ncols == 0) { 2412 if (cstart) { 2413 offdiagIdx[r] = 0; 2414 } else offdiagIdx[r] = cend; 2415 } else { /* ncols > 0 */ 2416 offdiagIdx[r] = cmap[ncols - 1] + 1; 2417 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2418 } 2419 } 2420 } 2421 2422 for (j = 0; j < ncols; j++) { 2423 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2424 offdiagA[r] = *ba; 2425 offdiagIdx[r] = cmap[*bj]; 2426 } 2427 ba++; 2428 bj++; 2429 } 2430 } 2431 2432 PetscCall(VecGetArrayWrite(v, &a)); 2433 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2434 for (r = 0; r < m; ++r) { 2435 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 if (idx) idx[r] = cstart + diagIdx[r]; 2438 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2439 a[r] = diagA[r]; 2440 if (idx) { 2441 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2442 idx[r] = cstart + diagIdx[r]; 2443 } else idx[r] = offdiagIdx[r]; 2444 } 2445 } else { 2446 a[r] = offdiagA[r]; 2447 if (idx) idx[r] = offdiagIdx[r]; 2448 } 2449 } 2450 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2451 PetscCall(VecRestoreArrayWrite(v, &a)); 2452 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2453 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2454 PetscCall(VecDestroy(&diagV)); 2455 PetscCall(VecDestroy(&offdiagV)); 2456 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2457 PetscFunctionReturn(PETSC_SUCCESS); 2458 } 2459 2460 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2461 { 2462 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2463 PetscInt m = A->rmap->n, n = A->cmap->n; 2464 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2465 PetscInt *cmap = mat->garray; 2466 PetscInt *diagIdx, *offdiagIdx; 2467 Vec diagV, offdiagV; 2468 PetscScalar *a, *diagA, *offdiagA; 2469 const PetscScalar *ba, *bav; 2470 PetscInt r, j, col, ncols, *bi, *bj; 2471 Mat B = mat->B; 2472 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2473 2474 PetscFunctionBegin; 2475 /* When a process holds entire A and other processes have no entry */ 2476 if (A->cmap->N == n) { 2477 PetscCall(VecGetArrayWrite(v, &diagA)); 2478 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2479 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2480 PetscCall(VecDestroy(&diagV)); 2481 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } else if (n == 0) { 2484 if (m) { 2485 PetscCall(VecGetArrayWrite(v, &a)); 2486 for (r = 0; r < m; r++) { 2487 a[r] = PETSC_MIN_REAL; 2488 if (idx) idx[r] = -1; 2489 } 2490 PetscCall(VecRestoreArrayWrite(v, &a)); 2491 } 2492 PetscFunctionReturn(PETSC_SUCCESS); 2493 } 2494 2495 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2496 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2497 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2498 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2499 2500 /* Get offdiagIdx[] for implicit 0.0 */ 2501 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2502 ba = bav; 2503 bi = b->i; 2504 bj = b->j; 2505 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2506 for (r = 0; r < m; r++) { 2507 ncols = bi[r + 1] - bi[r]; 2508 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2509 offdiagA[r] = *ba; 2510 offdiagIdx[r] = cmap[0]; 2511 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2512 offdiagA[r] = 0.0; 2513 2514 /* Find first hole in the cmap */ 2515 for (j = 0; j < ncols; j++) { 2516 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2517 if (col > j && j < cstart) { 2518 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2519 break; 2520 } else if (col > j + n && j >= cstart) { 2521 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2522 break; 2523 } 2524 } 2525 if (j == ncols && ncols < A->cmap->N - n) { 2526 /* a hole is outside compressed Bcols */ 2527 if (ncols == 0) { 2528 if (cstart) { 2529 offdiagIdx[r] = 0; 2530 } else offdiagIdx[r] = cend; 2531 } else { /* ncols > 0 */ 2532 offdiagIdx[r] = cmap[ncols - 1] + 1; 2533 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2534 } 2535 } 2536 } 2537 2538 for (j = 0; j < ncols; j++) { 2539 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2540 offdiagA[r] = *ba; 2541 offdiagIdx[r] = cmap[*bj]; 2542 } 2543 ba++; 2544 bj++; 2545 } 2546 } 2547 2548 PetscCall(VecGetArrayWrite(v, &a)); 2549 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2550 for (r = 0; r < m; ++r) { 2551 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) idx[r] = cstart + diagIdx[r]; 2554 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2555 a[r] = diagA[r]; 2556 if (idx) { 2557 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2558 idx[r] = cstart + diagIdx[r]; 2559 } else idx[r] = offdiagIdx[r]; 2560 } 2561 } else { 2562 a[r] = offdiagA[r]; 2563 if (idx) idx[r] = offdiagIdx[r]; 2564 } 2565 } 2566 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2567 PetscCall(VecRestoreArrayWrite(v, &a)); 2568 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2569 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2570 PetscCall(VecDestroy(&diagV)); 2571 PetscCall(VecDestroy(&offdiagV)); 2572 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2577 { 2578 Mat *dummy; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2582 *newmat = *dummy; 2583 PetscCall(PetscFree(dummy)); 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2588 { 2589 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2590 2591 PetscFunctionBegin; 2592 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2593 A->factorerrortype = a->A->factorerrortype; 2594 PetscFunctionReturn(PETSC_SUCCESS); 2595 } 2596 2597 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2598 { 2599 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2600 2601 PetscFunctionBegin; 2602 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2603 PetscCall(MatSetRandom(aij->A, rctx)); 2604 if (x->assembled) { 2605 PetscCall(MatSetRandom(aij->B, rctx)); 2606 } else { 2607 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2608 } 2609 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2610 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2611 PetscFunctionReturn(PETSC_SUCCESS); 2612 } 2613 2614 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2615 { 2616 PetscFunctionBegin; 2617 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2618 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2619 PetscFunctionReturn(PETSC_SUCCESS); 2620 } 2621 2622 /*@ 2623 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2624 2625 Not collective 2626 2627 Input Parameter: 2628 . A - the matrix 2629 2630 Output Parameter: 2631 . nz - the number of nonzeros 2632 2633 Level: advanced 2634 2635 .seealso: `MATMPIAIJ`, `Mat` 2636 @*/ 2637 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2638 { 2639 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2640 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2641 2642 PetscFunctionBegin; 2643 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@ 2648 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2649 2650 Collective 2651 2652 Input Parameters: 2653 + A - the matrix 2654 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2655 2656 Level: advanced 2657 2658 @*/ 2659 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2660 { 2661 PetscFunctionBegin; 2662 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2667 { 2668 PetscBool sc = PETSC_FALSE, flg; 2669 2670 PetscFunctionBegin; 2671 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2672 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2673 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2674 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2675 PetscOptionsHeadEnd(); 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2680 { 2681 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2682 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2683 2684 PetscFunctionBegin; 2685 if (!Y->preallocated) { 2686 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2687 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2688 PetscInt nonew = aij->nonew; 2689 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2690 aij->nonew = nonew; 2691 } 2692 PetscCall(MatShift_Basic(Y, a)); 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2702 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2703 if (d) { 2704 PetscInt rstart; 2705 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2706 *d += rstart; 2707 } 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2712 { 2713 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2714 2715 PetscFunctionBegin; 2716 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2717 PetscFunctionReturn(PETSC_SUCCESS); 2718 } 2719 2720 PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A) 2721 { 2722 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2723 2724 PetscFunctionBegin; 2725 PetscCall(MatEliminateZeros(a->A)); 2726 PetscCall(MatEliminateZeros(a->B)); 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729 2730 /* -------------------------------------------------------------------*/ 2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2732 MatGetRow_MPIAIJ, 2733 MatRestoreRow_MPIAIJ, 2734 MatMult_MPIAIJ, 2735 /* 4*/ MatMultAdd_MPIAIJ, 2736 MatMultTranspose_MPIAIJ, 2737 MatMultTransposeAdd_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*10*/ NULL, 2742 NULL, 2743 NULL, 2744 MatSOR_MPIAIJ, 2745 MatTranspose_MPIAIJ, 2746 /*15*/ MatGetInfo_MPIAIJ, 2747 MatEqual_MPIAIJ, 2748 MatGetDiagonal_MPIAIJ, 2749 MatDiagonalScale_MPIAIJ, 2750 MatNorm_MPIAIJ, 2751 /*20*/ MatAssemblyBegin_MPIAIJ, 2752 MatAssemblyEnd_MPIAIJ, 2753 MatSetOption_MPIAIJ, 2754 MatZeroEntries_MPIAIJ, 2755 /*24*/ MatZeroRows_MPIAIJ, 2756 NULL, 2757 NULL, 2758 NULL, 2759 NULL, 2760 /*29*/ MatSetUp_MPI_Hash, 2761 NULL, 2762 NULL, 2763 MatGetDiagonalBlock_MPIAIJ, 2764 NULL, 2765 /*34*/ MatDuplicate_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*39*/ MatAXPY_MPIAIJ, 2771 MatCreateSubMatrices_MPIAIJ, 2772 MatIncreaseOverlap_MPIAIJ, 2773 MatGetValues_MPIAIJ, 2774 MatCopy_MPIAIJ, 2775 /*44*/ MatGetRowMax_MPIAIJ, 2776 MatScale_MPIAIJ, 2777 MatShift_MPIAIJ, 2778 MatDiagonalSet_MPIAIJ, 2779 MatZeroRowsColumns_MPIAIJ, 2780 /*49*/ MatSetRandom_MPIAIJ, 2781 MatGetRowIJ_MPIAIJ, 2782 MatRestoreRowIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*54*/ MatFDColoringCreate_MPIXAIJ, 2786 NULL, 2787 MatSetUnfactored_MPIAIJ, 2788 MatPermute_MPIAIJ, 2789 NULL, 2790 /*59*/ MatCreateSubMatrix_MPIAIJ, 2791 MatDestroy_MPIAIJ, 2792 MatView_MPIAIJ, 2793 NULL, 2794 NULL, 2795 /*64*/ NULL, 2796 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2801 MatGetRowMinAbs_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*75*/ MatFDColoringApply_AIJ, 2807 MatSetFromOptions_MPIAIJ, 2808 NULL, 2809 NULL, 2810 MatFindZeroDiagonals_MPIAIJ, 2811 /*80*/ NULL, 2812 NULL, 2813 NULL, 2814 /*83*/ MatLoad_MPIAIJ, 2815 MatIsSymmetric_MPIAIJ, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*89*/ NULL, 2821 NULL, 2822 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 NULL, 2829 MatBindToCPU_MPIAIJ, 2830 /*99*/ MatProductSetFromOptions_MPIAIJ, 2831 NULL, 2832 NULL, 2833 MatConjugate_MPIAIJ, 2834 NULL, 2835 /*104*/ MatSetValuesRow_MPIAIJ, 2836 MatRealPart_MPIAIJ, 2837 MatImaginaryPart_MPIAIJ, 2838 NULL, 2839 NULL, 2840 /*109*/ NULL, 2841 NULL, 2842 MatGetRowMin_MPIAIJ, 2843 NULL, 2844 MatMissingDiagonal_MPIAIJ, 2845 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2846 NULL, 2847 MatGetGhosts_MPIAIJ, 2848 NULL, 2849 NULL, 2850 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2851 NULL, 2852 NULL, 2853 NULL, 2854 MatGetMultiProcBlock_MPIAIJ, 2855 /*124*/ MatFindNonzeroRows_MPIAIJ, 2856 MatGetColumnReductions_MPIAIJ, 2857 MatInvertBlockDiagonal_MPIAIJ, 2858 MatInvertVariableBlockDiagonal_MPIAIJ, 2859 MatCreateSubMatricesMPI_MPIAIJ, 2860 /*129*/ NULL, 2861 NULL, 2862 NULL, 2863 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2864 NULL, 2865 /*134*/ NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*139*/ MatSetBlockSizes_MPIAIJ, 2871 NULL, 2872 NULL, 2873 MatFDColoringSetUp_MPIXAIJ, 2874 MatFindOffBlockDiagonalEntries_MPIAIJ, 2875 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2876 /*145*/ NULL, 2877 NULL, 2878 NULL, 2879 MatCreateGraph_Simple_AIJ, 2880 NULL, 2881 /*150*/ NULL, 2882 MatEliminateZeros_MPIAIJ}; 2883 2884 /* ----------------------------------------------------------------------------------------*/ 2885 2886 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2887 { 2888 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2889 2890 PetscFunctionBegin; 2891 PetscCall(MatStoreValues(aij->A)); 2892 PetscCall(MatStoreValues(aij->B)); 2893 PetscFunctionReturn(PETSC_SUCCESS); 2894 } 2895 2896 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2897 { 2898 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2899 2900 PetscFunctionBegin; 2901 PetscCall(MatRetrieveValues(aij->A)); 2902 PetscCall(MatRetrieveValues(aij->B)); 2903 PetscFunctionReturn(PETSC_SUCCESS); 2904 } 2905 2906 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2907 { 2908 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2909 PetscMPIInt size; 2910 2911 PetscFunctionBegin; 2912 if (B->hash_active) { 2913 PetscCall(PetscMemcpy(&B->ops, &b->cops, sizeof(*(B->ops)))); 2914 B->hash_active = PETSC_FALSE; 2915 } 2916 PetscCall(PetscLayoutSetUp(B->rmap)); 2917 PetscCall(PetscLayoutSetUp(B->cmap)); 2918 2919 #if defined(PETSC_USE_CTABLE) 2920 PetscCall(PetscHMapIDestroy(&b->colmap)); 2921 #else 2922 PetscCall(PetscFree(b->colmap)); 2923 #endif 2924 PetscCall(PetscFree(b->garray)); 2925 PetscCall(VecDestroy(&b->lvec)); 2926 PetscCall(VecScatterDestroy(&b->Mvctx)); 2927 2928 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2929 PetscCall(MatDestroy(&b->B)); 2930 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2931 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2932 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2933 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2934 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 2941 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2942 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2943 B->preallocated = PETSC_TRUE; 2944 B->was_assembled = PETSC_FALSE; 2945 B->assembled = PETSC_FALSE; 2946 PetscFunctionReturn(PETSC_SUCCESS); 2947 } 2948 2949 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2950 { 2951 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2952 2953 PetscFunctionBegin; 2954 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2955 PetscCall(PetscLayoutSetUp(B->rmap)); 2956 PetscCall(PetscLayoutSetUp(B->cmap)); 2957 2958 #if defined(PETSC_USE_CTABLE) 2959 PetscCall(PetscHMapIDestroy(&b->colmap)); 2960 #else 2961 PetscCall(PetscFree(b->colmap)); 2962 #endif 2963 PetscCall(PetscFree(b->garray)); 2964 PetscCall(VecDestroy(&b->lvec)); 2965 PetscCall(VecScatterDestroy(&b->Mvctx)); 2966 2967 PetscCall(MatResetPreallocation(b->A)); 2968 PetscCall(MatResetPreallocation(b->B)); 2969 B->preallocated = PETSC_TRUE; 2970 B->was_assembled = PETSC_FALSE; 2971 B->assembled = PETSC_FALSE; 2972 PetscFunctionReturn(PETSC_SUCCESS); 2973 } 2974 2975 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2976 { 2977 Mat mat; 2978 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2979 2980 PetscFunctionBegin; 2981 *newmat = NULL; 2982 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2983 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2984 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2985 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2986 a = (Mat_MPIAIJ *)mat->data; 2987 2988 mat->factortype = matin->factortype; 2989 mat->assembled = matin->assembled; 2990 mat->insertmode = NOT_SET_VALUES; 2991 mat->preallocated = matin->preallocated; 2992 2993 a->size = oldmat->size; 2994 a->rank = oldmat->rank; 2995 a->donotstash = oldmat->donotstash; 2996 a->roworiented = oldmat->roworiented; 2997 a->rowindices = NULL; 2998 a->rowvalues = NULL; 2999 a->getrowactive = PETSC_FALSE; 3000 3001 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3002 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3003 3004 if (oldmat->colmap) { 3005 #if defined(PETSC_USE_CTABLE) 3006 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3007 #else 3008 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3009 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3010 #endif 3011 } else a->colmap = NULL; 3012 if (oldmat->garray) { 3013 PetscInt len; 3014 len = oldmat->B->cmap->n; 3015 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3016 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3017 } else a->garray = NULL; 3018 3019 /* It may happen MatDuplicate is called with a non-assembled matrix 3020 In fact, MatDuplicate only requires the matrix to be preallocated 3021 This may happen inside a DMCreateMatrix_Shell */ 3022 if (oldmat->lvec) { PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); } 3023 if (oldmat->Mvctx) { PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); } 3024 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3025 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3026 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3027 *newmat = mat; 3028 PetscFunctionReturn(PETSC_SUCCESS); 3029 } 3030 3031 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3032 { 3033 PetscBool isbinary, ishdf5; 3034 3035 PetscFunctionBegin; 3036 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3037 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3038 /* force binary viewer to load .info file if it has not yet done so */ 3039 PetscCall(PetscViewerSetUp(viewer)); 3040 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3041 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3042 if (isbinary) { 3043 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3044 } else if (ishdf5) { 3045 #if defined(PETSC_HAVE_HDF5) 3046 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3047 #else 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3049 #endif 3050 } else { 3051 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3052 } 3053 PetscFunctionReturn(PETSC_SUCCESS); 3054 } 3055 3056 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3057 { 3058 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3059 PetscInt *rowidxs, *colidxs; 3060 PetscScalar *matvals; 3061 3062 PetscFunctionBegin; 3063 PetscCall(PetscViewerSetUp(viewer)); 3064 3065 /* read in matrix header */ 3066 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3067 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3068 M = header[1]; 3069 N = header[2]; 3070 nz = header[3]; 3071 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3072 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3073 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3074 3075 /* set block sizes from the viewer's .info file */ 3076 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3077 /* set global sizes if not set already */ 3078 if (mat->rmap->N < 0) mat->rmap->N = M; 3079 if (mat->cmap->N < 0) mat->cmap->N = N; 3080 PetscCall(PetscLayoutSetUp(mat->rmap)); 3081 PetscCall(PetscLayoutSetUp(mat->cmap)); 3082 3083 /* check if the matrix sizes are correct */ 3084 PetscCall(MatGetSize(mat, &rows, &cols)); 3085 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3086 3087 /* read in row lengths and build row indices */ 3088 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3089 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3090 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3091 rowidxs[0] = 0; 3092 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3093 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3094 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3095 /* read in column indices and matrix values */ 3096 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3099 /* store matrix indices and values */ 3100 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3101 PetscCall(PetscFree(rowidxs)); 3102 PetscCall(PetscFree2(colidxs, matvals)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /* Not scalable because of ISAllGather() unless getting all columns. */ 3107 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3108 { 3109 IS iscol_local; 3110 PetscBool isstride; 3111 PetscMPIInt lisstride = 0, gisstride; 3112 3113 PetscFunctionBegin; 3114 /* check if we are grabbing all columns*/ 3115 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3116 3117 if (isstride) { 3118 PetscInt start, len, mstart, mlen; 3119 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3120 PetscCall(ISGetLocalSize(iscol, &len)); 3121 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3122 if (mstart == start && mlen - mstart == len) lisstride = 1; 3123 } 3124 3125 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3126 if (gisstride) { 3127 PetscInt N; 3128 PetscCall(MatGetSize(mat, NULL, &N)); 3129 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3130 PetscCall(ISSetIdentity(iscol_local)); 3131 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3132 } else { 3133 PetscInt cbs; 3134 PetscCall(ISGetBlockSize(iscol, &cbs)); 3135 PetscCall(ISAllGather(iscol, &iscol_local)); 3136 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3137 } 3138 3139 *isseq = iscol_local; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 3143 /* 3144 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3145 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3146 3147 Input Parameters: 3148 mat - matrix 3149 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3150 i.e., mat->rstart <= isrow[i] < mat->rend 3151 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3152 i.e., mat->cstart <= iscol[i] < mat->cend 3153 Output Parameter: 3154 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3155 iscol_o - sequential column index set for retrieving mat->B 3156 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3157 */ 3158 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) 3159 { 3160 Vec x, cmap; 3161 const PetscInt *is_idx; 3162 PetscScalar *xarray, *cmaparray; 3163 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3164 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3165 Mat B = a->B; 3166 Vec lvec = a->lvec, lcmap; 3167 PetscInt i, cstart, cend, Bn = B->cmap->N; 3168 MPI_Comm comm; 3169 VecScatter Mvctx = a->Mvctx; 3170 3171 PetscFunctionBegin; 3172 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3173 PetscCall(ISGetLocalSize(iscol, &ncols)); 3174 3175 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3176 PetscCall(MatCreateVecs(mat, &x, NULL)); 3177 PetscCall(VecSet(x, -1.0)); 3178 PetscCall(VecDuplicate(x, &cmap)); 3179 PetscCall(VecSet(cmap, -1.0)); 3180 3181 /* Get start indices */ 3182 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3183 isstart -= ncols; 3184 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3185 3186 PetscCall(ISGetIndices(iscol, &is_idx)); 3187 PetscCall(VecGetArray(x, &xarray)); 3188 PetscCall(VecGetArray(cmap, &cmaparray)); 3189 PetscCall(PetscMalloc1(ncols, &idx)); 3190 for (i = 0; i < ncols; i++) { 3191 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3192 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3193 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3194 } 3195 PetscCall(VecRestoreArray(x, &xarray)); 3196 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3197 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3198 3199 /* Get iscol_d */ 3200 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3201 PetscCall(ISGetBlockSize(iscol, &i)); 3202 PetscCall(ISSetBlockSize(*iscol_d, i)); 3203 3204 /* Get isrow_d */ 3205 PetscCall(ISGetLocalSize(isrow, &m)); 3206 rstart = mat->rmap->rstart; 3207 PetscCall(PetscMalloc1(m, &idx)); 3208 PetscCall(ISGetIndices(isrow, &is_idx)); 3209 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3210 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3211 3212 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3213 PetscCall(ISGetBlockSize(isrow, &i)); 3214 PetscCall(ISSetBlockSize(*isrow_d, i)); 3215 3216 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3217 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3218 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3219 3220 PetscCall(VecDuplicate(lvec, &lcmap)); 3221 3222 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3223 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3224 3225 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3226 /* off-process column indices */ 3227 count = 0; 3228 PetscCall(PetscMalloc1(Bn, &idx)); 3229 PetscCall(PetscMalloc1(Bn, &cmap1)); 3230 3231 PetscCall(VecGetArray(lvec, &xarray)); 3232 PetscCall(VecGetArray(lcmap, &cmaparray)); 3233 for (i = 0; i < Bn; i++) { 3234 if (PetscRealPart(xarray[i]) > -1.0) { 3235 idx[count] = i; /* local column index in off-diagonal part B */ 3236 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3237 count++; 3238 } 3239 } 3240 PetscCall(VecRestoreArray(lvec, &xarray)); 3241 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3242 3243 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3244 /* cannot ensure iscol_o has same blocksize as iscol! */ 3245 3246 PetscCall(PetscFree(idx)); 3247 *garray = cmap1; 3248 3249 PetscCall(VecDestroy(&x)); 3250 PetscCall(VecDestroy(&cmap)); 3251 PetscCall(VecDestroy(&lcmap)); 3252 PetscFunctionReturn(PETSC_SUCCESS); 3253 } 3254 3255 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3256 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3257 { 3258 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3259 Mat M = NULL; 3260 MPI_Comm comm; 3261 IS iscol_d, isrow_d, iscol_o; 3262 Mat Asub = NULL, Bsub = NULL; 3263 PetscInt n; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3267 3268 if (call == MAT_REUSE_MATRIX) { 3269 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3270 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3271 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3272 3273 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3274 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3275 3276 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3277 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3278 3279 /* Update diagonal and off-diagonal portions of submat */ 3280 asub = (Mat_MPIAIJ *)(*submat)->data; 3281 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3282 PetscCall(ISGetLocalSize(iscol_o, &n)); 3283 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3284 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3285 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3286 3287 } else { /* call == MAT_INITIAL_MATRIX) */ 3288 const PetscInt *garray; 3289 PetscInt BsubN; 3290 3291 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3292 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3293 3294 /* Create local submatrices Asub and Bsub */ 3295 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3296 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3297 3298 /* Create submatrix M */ 3299 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3300 3301 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3302 asub = (Mat_MPIAIJ *)M->data; 3303 3304 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3305 n = asub->B->cmap->N; 3306 if (BsubN > n) { 3307 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3308 const PetscInt *idx; 3309 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3310 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3311 3312 PetscCall(PetscMalloc1(n, &idx_new)); 3313 j = 0; 3314 PetscCall(ISGetIndices(iscol_o, &idx)); 3315 for (i = 0; i < n; i++) { 3316 if (j >= BsubN) break; 3317 while (subgarray[i] > garray[j]) j++; 3318 3319 if (subgarray[i] == garray[j]) { 3320 idx_new[i] = idx[j++]; 3321 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3322 } 3323 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3324 3325 PetscCall(ISDestroy(&iscol_o)); 3326 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3327 3328 } else if (BsubN < n) { 3329 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3330 } 3331 3332 PetscCall(PetscFree(garray)); 3333 *submat = M; 3334 3335 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3336 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3337 PetscCall(ISDestroy(&isrow_d)); 3338 3339 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3340 PetscCall(ISDestroy(&iscol_d)); 3341 3342 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3343 PetscCall(ISDestroy(&iscol_o)); 3344 } 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3349 { 3350 IS iscol_local = NULL, isrow_d; 3351 PetscInt csize; 3352 PetscInt n, i, j, start, end; 3353 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3354 MPI_Comm comm; 3355 3356 PetscFunctionBegin; 3357 /* If isrow has same processor distribution as mat, 3358 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3359 if (call == MAT_REUSE_MATRIX) { 3360 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3361 if (isrow_d) { 3362 sameRowDist = PETSC_TRUE; 3363 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3364 } else { 3365 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3366 if (iscol_local) { 3367 sameRowDist = PETSC_TRUE; 3368 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3369 } 3370 } 3371 } else { 3372 /* Check if isrow has same processor distribution as mat */ 3373 sameDist[0] = PETSC_FALSE; 3374 PetscCall(ISGetLocalSize(isrow, &n)); 3375 if (!n) { 3376 sameDist[0] = PETSC_TRUE; 3377 } else { 3378 PetscCall(ISGetMinMax(isrow, &i, &j)); 3379 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3380 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3381 } 3382 3383 /* Check if iscol has same processor distribution as mat */ 3384 sameDist[1] = PETSC_FALSE; 3385 PetscCall(ISGetLocalSize(iscol, &n)); 3386 if (!n) { 3387 sameDist[1] = PETSC_TRUE; 3388 } else { 3389 PetscCall(ISGetMinMax(iscol, &i, &j)); 3390 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3391 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3392 } 3393 3394 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3395 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3396 sameRowDist = tsameDist[0]; 3397 } 3398 3399 if (sameRowDist) { 3400 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3401 /* isrow and iscol have same processor distribution as mat */ 3402 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } else { /* sameRowDist */ 3405 /* isrow has same processor distribution as mat */ 3406 if (call == MAT_INITIAL_MATRIX) { 3407 PetscBool sorted; 3408 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3409 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3410 PetscCall(ISGetSize(iscol, &i)); 3411 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3412 3413 PetscCall(ISSorted(iscol_local, &sorted)); 3414 if (sorted) { 3415 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 } else { /* call == MAT_REUSE_MATRIX */ 3420 IS iscol_sub; 3421 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3422 if (iscol_sub) { 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 } 3427 } 3428 } 3429 3430 /* General case: iscol -> iscol_local which has global size of iscol */ 3431 if (call == MAT_REUSE_MATRIX) { 3432 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3433 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3434 } else { 3435 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3436 } 3437 3438 PetscCall(ISGetLocalSize(iscol, &csize)); 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3440 3441 if (call == MAT_INITIAL_MATRIX) { 3442 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3443 PetscCall(ISDestroy(&iscol_local)); 3444 } 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 3448 /*@C 3449 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3450 and "off-diagonal" part of the matrix in CSR format. 3451 3452 Collective 3453 3454 Input Parameters: 3455 + comm - MPI communicator 3456 . A - "diagonal" portion of matrix 3457 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3458 - garray - global index of B columns 3459 3460 Output Parameter: 3461 . mat - the matrix, with input A as its local diagonal matrix 3462 Level: advanced 3463 3464 Notes: 3465 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3466 3467 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3468 3469 .seealso: `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3470 @*/ 3471 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3472 { 3473 Mat_MPIAIJ *maij; 3474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3475 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3476 const PetscScalar *oa; 3477 Mat Bnew; 3478 PetscInt m, n, N; 3479 MatType mpi_mat_type; 3480 3481 PetscFunctionBegin; 3482 PetscCall(MatCreate(comm, mat)); 3483 PetscCall(MatGetSize(A, &m, &n)); 3484 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3485 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3486 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3487 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3488 3489 /* Get global columns of mat */ 3490 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3491 3492 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3493 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3494 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3495 PetscCall(MatSetType(*mat, mpi_mat_type)); 3496 3497 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3498 maij = (Mat_MPIAIJ *)(*mat)->data; 3499 3500 (*mat)->preallocated = PETSC_TRUE; 3501 3502 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3503 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3504 3505 /* Set A as diagonal portion of *mat */ 3506 maij->A = A; 3507 3508 nz = oi[m]; 3509 for (i = 0; i < nz; i++) { 3510 col = oj[i]; 3511 oj[i] = garray[col]; 3512 } 3513 3514 /* Set Bnew as off-diagonal portion of *mat */ 3515 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3516 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3517 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3518 bnew = (Mat_SeqAIJ *)Bnew->data; 3519 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3520 maij->B = Bnew; 3521 3522 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3523 3524 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3525 b->free_a = PETSC_FALSE; 3526 b->free_ij = PETSC_FALSE; 3527 PetscCall(MatDestroy(&B)); 3528 3529 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3530 bnew->free_a = PETSC_TRUE; 3531 bnew->free_ij = PETSC_TRUE; 3532 3533 /* condense columns of maij->B */ 3534 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3535 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3536 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3537 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3538 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3539 PetscFunctionReturn(PETSC_SUCCESS); 3540 } 3541 3542 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3543 3544 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3545 { 3546 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3547 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3548 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3549 Mat M, Msub, B = a->B; 3550 MatScalar *aa; 3551 Mat_SeqAIJ *aij; 3552 PetscInt *garray = a->garray, *colsub, Ncols; 3553 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3554 IS iscol_sub, iscmap; 3555 const PetscInt *is_idx, *cmap; 3556 PetscBool allcolumns = PETSC_FALSE; 3557 MPI_Comm comm; 3558 3559 PetscFunctionBegin; 3560 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3561 if (call == MAT_REUSE_MATRIX) { 3562 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3563 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3564 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3565 3566 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3567 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3568 3569 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3570 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3571 3572 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3573 3574 } else { /* call == MAT_INITIAL_MATRIX) */ 3575 PetscBool flg; 3576 3577 PetscCall(ISGetLocalSize(iscol, &n)); 3578 PetscCall(ISGetSize(iscol, &Ncols)); 3579 3580 /* (1) iscol -> nonscalable iscol_local */ 3581 /* Check for special case: each processor gets entire matrix columns */ 3582 PetscCall(ISIdentity(iscol_local, &flg)); 3583 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3584 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3585 if (allcolumns) { 3586 iscol_sub = iscol_local; 3587 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3588 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3589 3590 } else { 3591 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3592 PetscInt *idx, *cmap1, k; 3593 PetscCall(PetscMalloc1(Ncols, &idx)); 3594 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3595 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3596 count = 0; 3597 k = 0; 3598 for (i = 0; i < Ncols; i++) { 3599 j = is_idx[i]; 3600 if (j >= cstart && j < cend) { 3601 /* diagonal part of mat */ 3602 idx[count] = j; 3603 cmap1[count++] = i; /* column index in submat */ 3604 } else if (Bn) { 3605 /* off-diagonal part of mat */ 3606 if (j == garray[k]) { 3607 idx[count] = j; 3608 cmap1[count++] = i; /* column index in submat */ 3609 } else if (j > garray[k]) { 3610 while (j > garray[k] && k < Bn - 1) k++; 3611 if (j == garray[k]) { 3612 idx[count] = j; 3613 cmap1[count++] = i; /* column index in submat */ 3614 } 3615 } 3616 } 3617 } 3618 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3619 3620 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3621 PetscCall(ISGetBlockSize(iscol, &cbs)); 3622 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3623 3624 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3625 } 3626 3627 /* (3) Create sequential Msub */ 3628 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3629 } 3630 3631 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3632 aij = (Mat_SeqAIJ *)(Msub)->data; 3633 ii = aij->i; 3634 PetscCall(ISGetIndices(iscmap, &cmap)); 3635 3636 /* 3637 m - number of local rows 3638 Ncols - number of columns (same on all processors) 3639 rstart - first row in new global matrix generated 3640 */ 3641 PetscCall(MatGetSize(Msub, &m, NULL)); 3642 3643 if (call == MAT_INITIAL_MATRIX) { 3644 /* (4) Create parallel newmat */ 3645 PetscMPIInt rank, size; 3646 PetscInt csize; 3647 3648 PetscCallMPI(MPI_Comm_size(comm, &size)); 3649 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3650 3651 /* 3652 Determine the number of non-zeros in the diagonal and off-diagonal 3653 portions of the matrix in order to do correct preallocation 3654 */ 3655 3656 /* first get start and end of "diagonal" columns */ 3657 PetscCall(ISGetLocalSize(iscol, &csize)); 3658 if (csize == PETSC_DECIDE) { 3659 PetscCall(ISGetSize(isrow, &mglobal)); 3660 if (mglobal == Ncols) { /* square matrix */ 3661 nlocal = m; 3662 } else { 3663 nlocal = Ncols / size + ((Ncols % size) > rank); 3664 } 3665 } else { 3666 nlocal = csize; 3667 } 3668 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3669 rstart = rend - nlocal; 3670 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3671 3672 /* next, compute all the lengths */ 3673 jj = aij->j; 3674 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3675 olens = dlens + m; 3676 for (i = 0; i < m; i++) { 3677 jend = ii[i + 1] - ii[i]; 3678 olen = 0; 3679 dlen = 0; 3680 for (j = 0; j < jend; j++) { 3681 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3682 else dlen++; 3683 jj++; 3684 } 3685 olens[i] = olen; 3686 dlens[i] = dlen; 3687 } 3688 3689 PetscCall(ISGetBlockSize(isrow, &bs)); 3690 PetscCall(ISGetBlockSize(iscol, &cbs)); 3691 3692 PetscCall(MatCreate(comm, &M)); 3693 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3694 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3695 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3696 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3697 PetscCall(PetscFree(dlens)); 3698 3699 } else { /* call == MAT_REUSE_MATRIX */ 3700 M = *newmat; 3701 PetscCall(MatGetLocalSize(M, &i, NULL)); 3702 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3703 PetscCall(MatZeroEntries(M)); 3704 /* 3705 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3706 rather than the slower MatSetValues(). 3707 */ 3708 M->was_assembled = PETSC_TRUE; 3709 M->assembled = PETSC_FALSE; 3710 } 3711 3712 /* (5) Set values of Msub to *newmat */ 3713 PetscCall(PetscMalloc1(count, &colsub)); 3714 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3715 3716 jj = aij->j; 3717 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3718 for (i = 0; i < m; i++) { 3719 row = rstart + i; 3720 nz = ii[i + 1] - ii[i]; 3721 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3722 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3723 jj += nz; 3724 aa += nz; 3725 } 3726 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3727 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3728 3729 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3730 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3731 3732 PetscCall(PetscFree(colsub)); 3733 3734 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3735 if (call == MAT_INITIAL_MATRIX) { 3736 *newmat = M; 3737 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3738 PetscCall(MatDestroy(&Msub)); 3739 3740 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3741 PetscCall(ISDestroy(&iscol_sub)); 3742 3743 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3744 PetscCall(ISDestroy(&iscmap)); 3745 3746 if (iscol_local) { 3747 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3748 PetscCall(ISDestroy(&iscol_local)); 3749 } 3750 } 3751 PetscFunctionReturn(PETSC_SUCCESS); 3752 } 3753 3754 /* 3755 Not great since it makes two copies of the submatrix, first an SeqAIJ 3756 in local and then by concatenating the local matrices the end result. 3757 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3758 3759 This requires a sequential iscol with all indices. 3760 */ 3761 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3762 { 3763 PetscMPIInt rank, size; 3764 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3765 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3766 Mat M, Mreuse; 3767 MatScalar *aa, *vwork; 3768 MPI_Comm comm; 3769 Mat_SeqAIJ *aij; 3770 PetscBool colflag, allcolumns = PETSC_FALSE; 3771 3772 PetscFunctionBegin; 3773 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3774 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3775 PetscCallMPI(MPI_Comm_size(comm, &size)); 3776 3777 /* Check for special case: each processor gets entire matrix columns */ 3778 PetscCall(ISIdentity(iscol, &colflag)); 3779 PetscCall(ISGetLocalSize(iscol, &n)); 3780 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3781 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3782 3783 if (call == MAT_REUSE_MATRIX) { 3784 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3785 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3786 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3787 } else { 3788 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3789 } 3790 3791 /* 3792 m - number of local rows 3793 n - number of columns (same on all processors) 3794 rstart - first row in new global matrix generated 3795 */ 3796 PetscCall(MatGetSize(Mreuse, &m, &n)); 3797 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3798 if (call == MAT_INITIAL_MATRIX) { 3799 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3800 ii = aij->i; 3801 jj = aij->j; 3802 3803 /* 3804 Determine the number of non-zeros in the diagonal and off-diagonal 3805 portions of the matrix in order to do correct preallocation 3806 */ 3807 3808 /* first get start and end of "diagonal" columns */ 3809 if (csize == PETSC_DECIDE) { 3810 PetscCall(ISGetSize(isrow, &mglobal)); 3811 if (mglobal == n) { /* square matrix */ 3812 nlocal = m; 3813 } else { 3814 nlocal = n / size + ((n % size) > rank); 3815 } 3816 } else { 3817 nlocal = csize; 3818 } 3819 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3820 rstart = rend - nlocal; 3821 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3822 3823 /* next, compute all the lengths */ 3824 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3825 olens = dlens + m; 3826 for (i = 0; i < m; i++) { 3827 jend = ii[i + 1] - ii[i]; 3828 olen = 0; 3829 dlen = 0; 3830 for (j = 0; j < jend; j++) { 3831 if (*jj < rstart || *jj >= rend) olen++; 3832 else dlen++; 3833 jj++; 3834 } 3835 olens[i] = olen; 3836 dlens[i] = dlen; 3837 } 3838 PetscCall(MatCreate(comm, &M)); 3839 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3840 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3841 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3842 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3843 PetscCall(PetscFree(dlens)); 3844 } else { 3845 PetscInt ml, nl; 3846 3847 M = *newmat; 3848 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3849 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3850 PetscCall(MatZeroEntries(M)); 3851 /* 3852 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3853 rather than the slower MatSetValues(). 3854 */ 3855 M->was_assembled = PETSC_TRUE; 3856 M->assembled = PETSC_FALSE; 3857 } 3858 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3859 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3860 ii = aij->i; 3861 jj = aij->j; 3862 3863 /* trigger copy to CPU if needed */ 3864 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3865 for (i = 0; i < m; i++) { 3866 row = rstart + i; 3867 nz = ii[i + 1] - ii[i]; 3868 cwork = jj; 3869 jj += nz; 3870 vwork = aa; 3871 aa += nz; 3872 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3873 } 3874 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3875 3876 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3877 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3878 *newmat = M; 3879 3880 /* save submatrix used in processor for next request */ 3881 if (call == MAT_INITIAL_MATRIX) { 3882 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3883 PetscCall(MatDestroy(&Mreuse)); 3884 } 3885 PetscFunctionReturn(PETSC_SUCCESS); 3886 } 3887 3888 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3889 { 3890 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3891 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3892 const PetscInt *JJ; 3893 PetscBool nooffprocentries; 3894 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3895 3896 PetscFunctionBegin; 3897 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3898 3899 PetscCall(PetscLayoutSetUp(B->rmap)); 3900 PetscCall(PetscLayoutSetUp(B->cmap)); 3901 m = B->rmap->n; 3902 cstart = B->cmap->rstart; 3903 cend = B->cmap->rend; 3904 rstart = B->rmap->rstart; 3905 3906 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3907 3908 if (PetscDefined(USE_DEBUG)) { 3909 for (i = 0; i < m; i++) { 3910 nnz = Ii[i + 1] - Ii[i]; 3911 JJ = J + Ii[i]; 3912 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3913 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3914 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3915 } 3916 } 3917 3918 for (i = 0; i < m; i++) { 3919 nnz = Ii[i + 1] - Ii[i]; 3920 JJ = J + Ii[i]; 3921 nnz_max = PetscMax(nnz_max, nnz); 3922 d = 0; 3923 for (j = 0; j < nnz; j++) { 3924 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3925 } 3926 d_nnz[i] = d; 3927 o_nnz[i] = nnz - d; 3928 } 3929 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3930 PetscCall(PetscFree2(d_nnz, o_nnz)); 3931 3932 for (i = 0; i < m; i++) { 3933 ii = i + rstart; 3934 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3935 } 3936 nooffprocentries = B->nooffprocentries; 3937 B->nooffprocentries = PETSC_TRUE; 3938 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3939 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3940 B->nooffprocentries = nooffprocentries; 3941 3942 /* count number of entries below block diagonal */ 3943 PetscCall(PetscFree(Aij->ld)); 3944 PetscCall(PetscCalloc1(m, &ld)); 3945 Aij->ld = ld; 3946 for (i = 0; i < m; i++) { 3947 nnz = Ii[i + 1] - Ii[i]; 3948 j = 0; 3949 while (j < nnz && J[j] < cstart) j++; 3950 ld[i] = j; 3951 J += nnz; 3952 } 3953 3954 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3955 PetscFunctionReturn(PETSC_SUCCESS); 3956 } 3957 3958 /*@ 3959 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3960 (the default parallel PETSc format). 3961 3962 Collective 3963 3964 Input Parameters: 3965 + B - the matrix 3966 . i - the indices into j for the start of each local row (starts with zero) 3967 . j - the column indices for each local row (starts with zero) 3968 - v - optional values in the matrix 3969 3970 Level: developer 3971 3972 Notes: 3973 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3974 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3975 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3976 3977 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3978 3979 The format which is used for the sparse matrix input, is equivalent to a 3980 row-major ordering.. i.e for the following matrix, the input data expected is 3981 as shown 3982 3983 $ 1 0 0 3984 $ 2 0 3 P0 3985 $ ------- 3986 $ 4 5 6 P1 3987 $ 3988 $ Process0 [P0]: rows_owned=[0,1] 3989 $ i = {0,1,3} [size = nrow+1 = 2+1] 3990 $ j = {0,0,2} [size = 3] 3991 $ v = {1,2,3} [size = 3] 3992 $ 3993 $ Process1 [P1]: rows_owned=[2] 3994 $ i = {0,3} [size = nrow+1 = 1+1] 3995 $ j = {0,1,2} [size = 3] 3996 $ v = {4,5,6} [size = 3] 3997 3998 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3999 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 4000 @*/ 4001 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4002 { 4003 PetscFunctionBegin; 4004 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4005 PetscFunctionReturn(PETSC_SUCCESS); 4006 } 4007 4008 /*@C 4009 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4010 (the default parallel PETSc format). For good matrix assembly performance 4011 the user should preallocate the matrix storage by setting the parameters 4012 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4013 performance can be increased by more than a factor of 50. 4014 4015 Collective 4016 4017 Input Parameters: 4018 + B - the matrix 4019 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4020 (same value is used for all local rows) 4021 . d_nnz - array containing the number of nonzeros in the various rows of the 4022 DIAGONAL portion of the local submatrix (possibly different for each row) 4023 or NULL (`PETSC_NULL_INTEGER` in Fortran), if d_nz is used to specify the nonzero structure. 4024 The size of this array is equal to the number of local rows, i.e 'm'. 4025 For matrices that will be factored, you must leave room for (and set) 4026 the diagonal entry even if it is zero. 4027 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4028 submatrix (same value is used for all local rows). 4029 - o_nnz - array containing the number of nonzeros in the various rows of the 4030 OFF-DIAGONAL portion of the local submatrix (possibly different for 4031 each row) or NULL (`PETSC_NULL_INTEGER` in Fortran), if o_nz is used to specify the nonzero 4032 structure. The size of this array is equal to the number 4033 of local rows, i.e 'm'. 4034 4035 If the *_nnz parameter is given then the *_nz parameter is ignored 4036 4037 The `MATAIJ` format, also called compressed row storage (CSR)), is fully compatible with standard Fortran 77 4038 storage. The stored row and column indices begin with zero. 4039 See [Sparse Matrices](sec_matsparse) for details. 4040 4041 The parallel matrix is partitioned such that the first m0 rows belong to 4042 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4043 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4044 4045 The DIAGONAL portion of the local submatrix of a processor can be defined 4046 as the submatrix which is obtained by extraction the part corresponding to 4047 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4048 first row that belongs to the processor, r2 is the last row belonging to 4049 the this processor, and c1-c2 is range of indices of the local part of a 4050 vector suitable for applying the matrix to. This is an mxn matrix. In the 4051 common case of a square matrix, the row and column ranges are the same and 4052 the DIAGONAL part is also square. The remaining portion of the local 4053 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4054 4055 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4056 4057 You can call MatGetInfo() to get information on how effective the preallocation was; 4058 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4059 You can also run with the option -info and look for messages with the string 4060 malloc in them to see if additional memory allocation was needed. 4061 4062 Example usage: 4063 4064 Consider the following 8x8 matrix with 34 non-zero values, that is 4065 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4066 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4067 as follows: 4068 4069 .vb 4070 1 2 0 | 0 3 0 | 0 4 4071 Proc0 0 5 6 | 7 0 0 | 8 0 4072 9 0 10 | 11 0 0 | 12 0 4073 ------------------------------------- 4074 13 0 14 | 15 16 17 | 0 0 4075 Proc1 0 18 0 | 19 20 21 | 0 0 4076 0 0 0 | 22 23 0 | 24 0 4077 ------------------------------------- 4078 Proc2 25 26 27 | 0 0 28 | 29 0 4079 30 0 0 | 31 32 33 | 0 34 4080 .ve 4081 4082 This can be represented as a collection of submatrices as: 4083 4084 .vb 4085 A B C 4086 D E F 4087 G H I 4088 .ve 4089 4090 Where the submatrices A,B,C are owned by proc0, D,E,F are 4091 owned by proc1, G,H,I are owned by proc2. 4092 4093 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4094 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4095 The 'M','N' parameters are 8,8, and have the same values on all procs. 4096 4097 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4098 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4099 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4100 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4101 part as `MATSEQAIJ` matrices. for eg: proc1 will store [E] as a SeqAIJ 4102 matrix, ans [DF] as another `MATSEQAIJ` matrix. 4103 4104 When d_nz, o_nz parameters are specified, d_nz storage elements are 4105 allocated for every row of the local diagonal submatrix, and o_nz 4106 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4107 One way to choose d_nz and o_nz is to use the max nonzerors per local 4108 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4109 In this case, the values of d_nz,o_nz are: 4110 .vb 4111 proc0 : dnz = 2, o_nz = 2 4112 proc1 : dnz = 3, o_nz = 2 4113 proc2 : dnz = 1, o_nz = 4 4114 .ve 4115 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4116 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4117 for proc3. i.e we are using 12+15+10=37 storage locations to store 4118 34 values. 4119 4120 When d_nnz, o_nnz parameters are specified, the storage is specified 4121 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4122 In the above case the values for d_nnz,o_nnz are: 4123 .vb 4124 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4125 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4126 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4127 .ve 4128 Here the space allocated is sum of all the above values i.e 34, and 4129 hence pre-allocation is perfect. 4130 4131 Level: intermediate 4132 4133 .seealso: [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4134 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4135 @*/ 4136 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4137 { 4138 PetscFunctionBegin; 4139 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4140 PetscValidType(B, 1); 4141 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4142 PetscFunctionReturn(PETSC_SUCCESS); 4143 } 4144 4145 /*@ 4146 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4147 CSR format for the local rows. 4148 4149 Collective 4150 4151 Input Parameters: 4152 + comm - MPI communicator 4153 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4154 . n - This value should be the same as the local size used in creating the 4155 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4156 calculated if N is given) For square matrices n is almost always m. 4157 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4158 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4159 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4160 . j - column indices 4161 - a - optional matrix values 4162 4163 Output Parameter: 4164 . mat - the matrix 4165 4166 Level: intermediate 4167 4168 Notes: 4169 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4170 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4171 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4172 4173 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4174 4175 The format which is used for the sparse matrix input, is equivalent to a 4176 row-major ordering.. i.e for the following matrix, the input data expected is 4177 as shown 4178 4179 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4180 4181 $ 1 0 0 4182 $ 2 0 3 P0 4183 $ ------- 4184 $ 4 5 6 P1 4185 $ 4186 $ Process0 [P0]: rows_owned=[0,1] 4187 $ i = {0,1,3} [size = nrow+1 = 2+1] 4188 $ j = {0,0,2} [size = 3] 4189 $ v = {1,2,3} [size = 3] 4190 $ 4191 $ Process1 [P1]: rows_owned=[2] 4192 $ i = {0,3} [size = nrow+1 = 1+1] 4193 $ j = {0,1,2} [size = 3] 4194 $ v = {4,5,6} [size = 3] 4195 4196 .seealso: `MATMPIAIK`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4197 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4198 @*/ 4199 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4200 { 4201 PetscFunctionBegin; 4202 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4203 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4204 PetscCall(MatCreate(comm, mat)); 4205 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4206 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4207 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4208 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4209 PetscFunctionReturn(PETSC_SUCCESS); 4210 } 4211 4212 /*@ 4213 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4214 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from `MatCreateMPIAIJWithArrays()` 4215 4216 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4217 4218 Collective 4219 4220 Input Parameters: 4221 + mat - the matrix 4222 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4223 . n - This value should be the same as the local size used in creating the 4224 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4225 calculated if N is given) For square matrices n is almost always m. 4226 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4227 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4228 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4229 . J - column indices 4230 - v - matrix values 4231 4232 Level: intermediate 4233 4234 .seealso: `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4235 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4236 @*/ 4237 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4238 { 4239 PetscInt nnz, i; 4240 PetscBool nooffprocentries; 4241 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4242 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4243 PetscScalar *ad, *ao; 4244 PetscInt ldi, Iii, md; 4245 const PetscInt *Adi = Ad->i; 4246 PetscInt *ld = Aij->ld; 4247 4248 PetscFunctionBegin; 4249 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4250 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4251 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4252 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4253 4254 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4255 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4256 4257 for (i = 0; i < m; i++) { 4258 nnz = Ii[i + 1] - Ii[i]; 4259 Iii = Ii[i]; 4260 ldi = ld[i]; 4261 md = Adi[i + 1] - Adi[i]; 4262 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4263 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4264 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4265 ad += md; 4266 ao += nnz - md; 4267 } 4268 nooffprocentries = mat->nooffprocentries; 4269 mat->nooffprocentries = PETSC_TRUE; 4270 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4271 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4272 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4273 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4274 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4275 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4276 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4277 mat->nooffprocentries = nooffprocentries; 4278 PetscFunctionReturn(PETSC_SUCCESS); 4279 } 4280 4281 /*@ 4282 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4283 4284 Collective 4285 4286 Input Parameters: 4287 + mat - the matrix 4288 - v - matrix values, stored by row 4289 4290 Level: intermediate 4291 4292 Note: 4293 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4294 4295 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4296 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4297 @*/ 4298 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4299 { 4300 PetscInt nnz, i, m; 4301 PetscBool nooffprocentries; 4302 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4305 PetscScalar *ad, *ao; 4306 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4307 PetscInt ldi, Iii, md; 4308 PetscInt *ld = Aij->ld; 4309 4310 PetscFunctionBegin; 4311 m = mat->rmap->n; 4312 4313 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4314 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4315 Iii = 0; 4316 for (i = 0; i < m; i++) { 4317 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4318 ldi = ld[i]; 4319 md = Adi[i + 1] - Adi[i]; 4320 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4321 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4322 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4323 ad += md; 4324 ao += nnz - md; 4325 Iii += nnz; 4326 } 4327 nooffprocentries = mat->nooffprocentries; 4328 mat->nooffprocentries = PETSC_TRUE; 4329 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4330 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4332 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4334 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4335 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4336 mat->nooffprocentries = nooffprocentries; 4337 PetscFunctionReturn(PETSC_SUCCESS); 4338 } 4339 4340 /*@C 4341 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4342 (the default parallel PETSc format). For good matrix assembly performance 4343 the user should preallocate the matrix storage by setting the parameters 4344 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4345 performance can be increased by more than a factor of 50. 4346 4347 Collective 4348 4349 Input Parameters: 4350 + comm - MPI communicator 4351 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4352 This value should be the same as the local size used in creating the 4353 y vector for the matrix-vector product y = Ax. 4354 . n - This value should be the same as the local size used in creating the 4355 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4356 calculated if N is given) For square matrices n is almost always m. 4357 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4358 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4359 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4360 (same value is used for all local rows) 4361 . d_nnz - array containing the number of nonzeros in the various rows of the 4362 DIAGONAL portion of the local submatrix (possibly different for each row) 4363 or NULL, if d_nz is used to specify the nonzero structure. 4364 The size of this array is equal to the number of local rows, i.e 'm'. 4365 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4366 submatrix (same value is used for all local rows). 4367 - o_nnz - array containing the number of nonzeros in the various rows of the 4368 OFF-DIAGONAL portion of the local submatrix (possibly different for 4369 each row) or NULL, if o_nz is used to specify the nonzero 4370 structure. The size of this array is equal to the number 4371 of local rows, i.e 'm'. 4372 4373 Output Parameter: 4374 . A - the matrix 4375 4376 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4377 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4378 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4379 4380 Notes: 4381 If the *_nnz parameter is given then the *_nz parameter is ignored 4382 4383 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4384 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4385 storage requirements for this matrix. 4386 4387 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4388 processor than it must be used on all processors that share the object for 4389 that argument. 4390 4391 The user MUST specify either the local or global matrix dimensions 4392 (possibly both). 4393 4394 The parallel matrix is partitioned across processors such that the 4395 first m0 rows belong to process 0, the next m1 rows belong to 4396 process 1, the next m2 rows belong to process 2 etc.. where 4397 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4398 values corresponding to [m x N] submatrix. 4399 4400 The columns are logically partitioned with the n0 columns belonging 4401 to 0th partition, the next n1 columns belonging to the next 4402 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4403 4404 The DIAGONAL portion of the local submatrix on any given processor 4405 is the submatrix corresponding to the rows and columns m,n 4406 corresponding to the given processor. i.e diagonal matrix on 4407 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4408 etc. The remaining portion of the local submatrix [m x (N-n)] 4409 constitute the OFF-DIAGONAL portion. The example below better 4410 illustrates this concept. 4411 4412 For a square global matrix we define each processor's diagonal portion 4413 to be its local rows and the corresponding columns (a square submatrix); 4414 each processor's off-diagonal portion encompasses the remainder of the 4415 local matrix (a rectangular submatrix). 4416 4417 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4418 4419 When calling this routine with a single process communicator, a matrix of 4420 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4421 type of communicator, use the construction mechanism 4422 .vb 4423 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4424 .ve 4425 4426 $ MatCreate(...,&A); 4427 $ MatSetType(A,MATMPIAIJ); 4428 $ MatSetSizes(A, m,n,M,N); 4429 $ MatMPIAIJSetPreallocation(A,...); 4430 4431 By default, this format uses inodes (identical nodes) when possible. 4432 We search for consecutive rows with the same nonzero structure, thereby 4433 reusing matrix information to achieve increased efficiency. 4434 4435 Options Database Keys: 4436 + -mat_no_inode - Do not use inodes 4437 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4438 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4439 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4440 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4441 4442 Example usage: 4443 4444 Consider the following 8x8 matrix with 34 non-zero values, that is 4445 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4446 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4447 as follows 4448 4449 .vb 4450 1 2 0 | 0 3 0 | 0 4 4451 Proc0 0 5 6 | 7 0 0 | 8 0 4452 9 0 10 | 11 0 0 | 12 0 4453 ------------------------------------- 4454 13 0 14 | 15 16 17 | 0 0 4455 Proc1 0 18 0 | 19 20 21 | 0 0 4456 0 0 0 | 22 23 0 | 24 0 4457 ------------------------------------- 4458 Proc2 25 26 27 | 0 0 28 | 29 0 4459 30 0 0 | 31 32 33 | 0 34 4460 .ve 4461 4462 This can be represented as a collection of submatrices as 4463 4464 .vb 4465 A B C 4466 D E F 4467 G H I 4468 .ve 4469 4470 Where the submatrices A,B,C are owned by proc0, D,E,F are 4471 owned by proc1, G,H,I are owned by proc2. 4472 4473 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4475 The 'M','N' parameters are 8,8, and have the same values on all procs. 4476 4477 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4478 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4479 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4480 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4481 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4482 matrix, ans [DF] as another SeqAIJ matrix. 4483 4484 When d_nz, o_nz parameters are specified, d_nz storage elements are 4485 allocated for every row of the local diagonal submatrix, and o_nz 4486 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4487 One way to choose d_nz and o_nz is to use the max nonzerors per local 4488 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4489 In this case, the values of d_nz,o_nz are 4490 .vb 4491 proc0 : dnz = 2, o_nz = 2 4492 proc1 : dnz = 3, o_nz = 2 4493 proc2 : dnz = 1, o_nz = 4 4494 .ve 4495 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4496 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4497 for proc3. i.e we are using 12+15+10=37 storage locations to store 4498 34 values. 4499 4500 When d_nnz, o_nnz parameters are specified, the storage is specified 4501 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4502 In the above case the values for d_nnz,o_nnz are 4503 .vb 4504 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4505 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4506 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4507 .ve 4508 Here the space allocated is sum of all the above values i.e 34, and 4509 hence pre-allocation is perfect. 4510 4511 Level: intermediate 4512 4513 .seealso: [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4514 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4515 @*/ 4516 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4517 { 4518 PetscMPIInt size; 4519 4520 PetscFunctionBegin; 4521 PetscCall(MatCreate(comm, A)); 4522 PetscCall(MatSetSizes(*A, m, n, M, N)); 4523 PetscCallMPI(MPI_Comm_size(comm, &size)); 4524 if (size > 1) { 4525 PetscCall(MatSetType(*A, MATMPIAIJ)); 4526 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4527 } else { 4528 PetscCall(MatSetType(*A, MATSEQAIJ)); 4529 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4530 } 4531 PetscFunctionReturn(PETSC_SUCCESS); 4532 } 4533 4534 /*MC 4535 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4536 4537 Synopsis: 4538 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4539 4540 Not Collective 4541 4542 Input Parameter: 4543 . A - the `MATMPIAIJ` matrix 4544 4545 Output Parameters: 4546 + Ad - the diagonal portion of the matrix 4547 . Ao - the off diagonal portion of the matrix 4548 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4549 - ierr - error code 4550 4551 Level: advanced 4552 4553 Note: 4554 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4555 4556 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4557 M*/ 4558 4559 /*MC 4560 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4561 4562 Synopsis: 4563 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4564 4565 Not Collective 4566 4567 Input Parameters: 4568 + A - the `MATMPIAIJ` matrix 4569 . Ad - the diagonal portion of the matrix 4570 . Ao - the off diagonal portion of the matrix 4571 . colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4572 - ierr - error code 4573 4574 Level: advanced 4575 4576 .seealso: [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4577 M*/ 4578 4579 /*@C 4580 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4581 4582 Not collective 4583 4584 Input Parameter: 4585 . A - The `MATMPIAIJ` matrix 4586 4587 Output Parameters: 4588 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4589 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4590 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4591 4592 Level: intermediate 4593 4594 Note: 4595 The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4596 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4597 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4598 local column numbers to global column numbers in the original matrix. 4599 4600 Fortran Note: 4601 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4602 4603 .seealso: `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4604 @*/ 4605 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4606 { 4607 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4608 PetscBool flg; 4609 4610 PetscFunctionBegin; 4611 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4612 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4613 if (Ad) *Ad = a->A; 4614 if (Ao) *Ao = a->B; 4615 if (colmap) *colmap = a->garray; 4616 PetscFunctionReturn(PETSC_SUCCESS); 4617 } 4618 4619 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4620 { 4621 PetscInt m, N, i, rstart, nnz, Ii; 4622 PetscInt *indx; 4623 PetscScalar *values; 4624 MatType rootType; 4625 4626 PetscFunctionBegin; 4627 PetscCall(MatGetSize(inmat, &m, &N)); 4628 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4629 PetscInt *dnz, *onz, sum, bs, cbs; 4630 4631 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4632 /* Check sum(n) = N */ 4633 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4634 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4635 4636 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4637 rstart -= m; 4638 4639 MatPreallocateBegin(comm, m, n, dnz, onz); 4640 for (i = 0; i < m; i++) { 4641 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4642 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4643 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4644 } 4645 4646 PetscCall(MatCreate(comm, outmat)); 4647 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4648 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4649 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4650 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4651 PetscCall(MatSetType(*outmat, rootType)); 4652 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4653 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4654 MatPreallocateEnd(dnz, onz); 4655 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4656 } 4657 4658 /* numeric phase */ 4659 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4660 for (i = 0; i < m; i++) { 4661 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4662 Ii = i + rstart; 4663 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4664 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4665 } 4666 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4667 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4668 PetscFunctionReturn(PETSC_SUCCESS); 4669 } 4670 4671 PetscErrorCode MatFileSplit(Mat A, char *outfile) 4672 { 4673 PetscMPIInt rank; 4674 PetscInt m, N, i, rstart, nnz; 4675 size_t len; 4676 const PetscInt *indx; 4677 PetscViewer out; 4678 char *name; 4679 Mat B; 4680 const PetscScalar *values; 4681 4682 PetscFunctionBegin; 4683 PetscCall(MatGetLocalSize(A, &m, NULL)); 4684 PetscCall(MatGetSize(A, NULL, &N)); 4685 /* Should this be the type of the diagonal block of A? */ 4686 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4687 PetscCall(MatSetSizes(B, m, N, m, N)); 4688 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4689 PetscCall(MatSetType(B, MATSEQAIJ)); 4690 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4691 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4692 for (i = 0; i < m; i++) { 4693 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4694 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4695 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4696 } 4697 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4698 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4699 4700 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4701 PetscCall(PetscStrlen(outfile, &len)); 4702 PetscCall(PetscMalloc1(len + 6, &name)); 4703 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4704 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4705 PetscCall(PetscFree(name)); 4706 PetscCall(MatView(B, out)); 4707 PetscCall(PetscViewerDestroy(&out)); 4708 PetscCall(MatDestroy(&B)); 4709 PetscFunctionReturn(PETSC_SUCCESS); 4710 } 4711 4712 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) 4713 { 4714 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4715 4716 PetscFunctionBegin; 4717 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4718 PetscCall(PetscFree(merge->id_r)); 4719 PetscCall(PetscFree(merge->len_s)); 4720 PetscCall(PetscFree(merge->len_r)); 4721 PetscCall(PetscFree(merge->bi)); 4722 PetscCall(PetscFree(merge->bj)); 4723 PetscCall(PetscFree(merge->buf_ri[0])); 4724 PetscCall(PetscFree(merge->buf_ri)); 4725 PetscCall(PetscFree(merge->buf_rj[0])); 4726 PetscCall(PetscFree(merge->buf_rj)); 4727 PetscCall(PetscFree(merge->coi)); 4728 PetscCall(PetscFree(merge->coj)); 4729 PetscCall(PetscFree(merge->owners_co)); 4730 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4731 PetscCall(PetscFree(merge)); 4732 PetscFunctionReturn(PETSC_SUCCESS); 4733 } 4734 4735 #include <../src/mat/utils/freespace.h> 4736 #include <petscbt.h> 4737 4738 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4739 { 4740 MPI_Comm comm; 4741 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4742 PetscMPIInt size, rank, taga, *len_s; 4743 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4744 PetscInt proc, m; 4745 PetscInt **buf_ri, **buf_rj; 4746 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4747 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4748 MPI_Request *s_waits, *r_waits; 4749 MPI_Status *status; 4750 const MatScalar *aa, *a_a; 4751 MatScalar **abuf_r, *ba_i; 4752 Mat_Merge_SeqsToMPI *merge; 4753 PetscContainer container; 4754 4755 PetscFunctionBegin; 4756 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4757 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4758 4759 PetscCallMPI(MPI_Comm_size(comm, &size)); 4760 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4761 4762 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4763 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4764 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4765 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4766 aa = a_a; 4767 4768 bi = merge->bi; 4769 bj = merge->bj; 4770 buf_ri = merge->buf_ri; 4771 buf_rj = merge->buf_rj; 4772 4773 PetscCall(PetscMalloc1(size, &status)); 4774 owners = merge->rowmap->range; 4775 len_s = merge->len_s; 4776 4777 /* send and recv matrix values */ 4778 /*-----------------------------*/ 4779 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4780 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4781 4782 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4783 for (proc = 0, k = 0; proc < size; proc++) { 4784 if (!len_s[proc]) continue; 4785 i = owners[proc]; 4786 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4787 k++; 4788 } 4789 4790 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4791 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4792 PetscCall(PetscFree(status)); 4793 4794 PetscCall(PetscFree(s_waits)); 4795 PetscCall(PetscFree(r_waits)); 4796 4797 /* insert mat values of mpimat */ 4798 /*----------------------------*/ 4799 PetscCall(PetscMalloc1(N, &ba_i)); 4800 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4801 4802 for (k = 0; k < merge->nrecv; k++) { 4803 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4804 nrows = *(buf_ri_k[k]); 4805 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4806 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4807 } 4808 4809 /* set values of ba */ 4810 m = merge->rowmap->n; 4811 for (i = 0; i < m; i++) { 4812 arow = owners[rank] + i; 4813 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4814 bnzi = bi[i + 1] - bi[i]; 4815 PetscCall(PetscArrayzero(ba_i, bnzi)); 4816 4817 /* add local non-zero vals of this proc's seqmat into ba */ 4818 anzi = ai[arow + 1] - ai[arow]; 4819 aj = a->j + ai[arow]; 4820 aa = a_a + ai[arow]; 4821 nextaj = 0; 4822 for (j = 0; nextaj < anzi; j++) { 4823 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4824 ba_i[j] += aa[nextaj++]; 4825 } 4826 } 4827 4828 /* add received vals into ba */ 4829 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4830 /* i-th row */ 4831 if (i == *nextrow[k]) { 4832 anzi = *(nextai[k] + 1) - *nextai[k]; 4833 aj = buf_rj[k] + *(nextai[k]); 4834 aa = abuf_r[k] + *(nextai[k]); 4835 nextaj = 0; 4836 for (j = 0; nextaj < anzi; j++) { 4837 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4838 ba_i[j] += aa[nextaj++]; 4839 } 4840 } 4841 nextrow[k]++; 4842 nextai[k]++; 4843 } 4844 } 4845 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4846 } 4847 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4848 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4849 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4850 4851 PetscCall(PetscFree(abuf_r[0])); 4852 PetscCall(PetscFree(abuf_r)); 4853 PetscCall(PetscFree(ba_i)); 4854 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4855 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4856 PetscFunctionReturn(PETSC_SUCCESS); 4857 } 4858 4859 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4860 { 4861 Mat B_mpi; 4862 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4863 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4864 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4865 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4866 PetscInt len, proc, *dnz, *onz, bs, cbs; 4867 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4868 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4869 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4870 MPI_Status *status; 4871 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4872 PetscBT lnkbt; 4873 Mat_Merge_SeqsToMPI *merge; 4874 PetscContainer container; 4875 4876 PetscFunctionBegin; 4877 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4878 4879 /* make sure it is a PETSc comm */ 4880 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4881 PetscCallMPI(MPI_Comm_size(comm, &size)); 4882 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4883 4884 PetscCall(PetscNew(&merge)); 4885 PetscCall(PetscMalloc1(size, &status)); 4886 4887 /* determine row ownership */ 4888 /*---------------------------------------------------------*/ 4889 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4890 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4891 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4892 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4893 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4894 PetscCall(PetscMalloc1(size, &len_si)); 4895 PetscCall(PetscMalloc1(size, &merge->len_s)); 4896 4897 m = merge->rowmap->n; 4898 owners = merge->rowmap->range; 4899 4900 /* determine the number of messages to send, their lengths */ 4901 /*---------------------------------------------------------*/ 4902 len_s = merge->len_s; 4903 4904 len = 0; /* length of buf_si[] */ 4905 merge->nsend = 0; 4906 for (proc = 0; proc < size; proc++) { 4907 len_si[proc] = 0; 4908 if (proc == rank) { 4909 len_s[proc] = 0; 4910 } else { 4911 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4912 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4913 } 4914 if (len_s[proc]) { 4915 merge->nsend++; 4916 nrows = 0; 4917 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4918 if (ai[i + 1] > ai[i]) nrows++; 4919 } 4920 len_si[proc] = 2 * (nrows + 1); 4921 len += len_si[proc]; 4922 } 4923 } 4924 4925 /* determine the number and length of messages to receive for ij-structure */ 4926 /*-------------------------------------------------------------------------*/ 4927 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4928 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4929 4930 /* post the Irecv of j-structure */ 4931 /*-------------------------------*/ 4932 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4933 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4934 4935 /* post the Isend of j-structure */ 4936 /*--------------------------------*/ 4937 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4938 4939 for (proc = 0, k = 0; proc < size; proc++) { 4940 if (!len_s[proc]) continue; 4941 i = owners[proc]; 4942 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4943 k++; 4944 } 4945 4946 /* receives and sends of j-structure are complete */ 4947 /*------------------------------------------------*/ 4948 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4949 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4950 4951 /* send and recv i-structure */ 4952 /*---------------------------*/ 4953 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4954 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4955 4956 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4957 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4958 for (proc = 0, k = 0; proc < size; proc++) { 4959 if (!len_s[proc]) continue; 4960 /* form outgoing message for i-structure: 4961 buf_si[0]: nrows to be sent 4962 [1:nrows]: row index (global) 4963 [nrows+1:2*nrows+1]: i-structure index 4964 */ 4965 /*-------------------------------------------*/ 4966 nrows = len_si[proc] / 2 - 1; 4967 buf_si_i = buf_si + nrows + 1; 4968 buf_si[0] = nrows; 4969 buf_si_i[0] = 0; 4970 nrows = 0; 4971 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4972 anzi = ai[i + 1] - ai[i]; 4973 if (anzi) { 4974 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4975 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4976 nrows++; 4977 } 4978 } 4979 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4980 k++; 4981 buf_si += len_si[proc]; 4982 } 4983 4984 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4985 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4986 4987 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4988 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4989 4990 PetscCall(PetscFree(len_si)); 4991 PetscCall(PetscFree(len_ri)); 4992 PetscCall(PetscFree(rj_waits)); 4993 PetscCall(PetscFree2(si_waits, sj_waits)); 4994 PetscCall(PetscFree(ri_waits)); 4995 PetscCall(PetscFree(buf_s)); 4996 PetscCall(PetscFree(status)); 4997 4998 /* compute a local seq matrix in each processor */ 4999 /*----------------------------------------------*/ 5000 /* allocate bi array and free space for accumulating nonzero column info */ 5001 PetscCall(PetscMalloc1(m + 1, &bi)); 5002 bi[0] = 0; 5003 5004 /* create and initialize a linked list */ 5005 nlnk = N + 1; 5006 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5007 5008 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5009 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5010 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5011 5012 current_space = free_space; 5013 5014 /* determine symbolic info for each local row */ 5015 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5016 5017 for (k = 0; k < merge->nrecv; k++) { 5018 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5019 nrows = *buf_ri_k[k]; 5020 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5021 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5022 } 5023 5024 MatPreallocateBegin(comm, m, n, dnz, onz); 5025 len = 0; 5026 for (i = 0; i < m; i++) { 5027 bnzi = 0; 5028 /* add local non-zero cols of this proc's seqmat into lnk */ 5029 arow = owners[rank] + i; 5030 anzi = ai[arow + 1] - ai[arow]; 5031 aj = a->j + ai[arow]; 5032 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5033 bnzi += nlnk; 5034 /* add received col data into lnk */ 5035 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5036 if (i == *nextrow[k]) { /* i-th row */ 5037 anzi = *(nextai[k] + 1) - *nextai[k]; 5038 aj = buf_rj[k] + *nextai[k]; 5039 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5040 bnzi += nlnk; 5041 nextrow[k]++; 5042 nextai[k]++; 5043 } 5044 } 5045 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5046 5047 /* if free space is not available, make more free space */ 5048 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5049 /* copy data into free space, then initialize lnk */ 5050 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5051 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5052 5053 current_space->array += bnzi; 5054 current_space->local_used += bnzi; 5055 current_space->local_remaining -= bnzi; 5056 5057 bi[i + 1] = bi[i] + bnzi; 5058 } 5059 5060 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5061 5062 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5063 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5064 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5065 5066 /* create symbolic parallel matrix B_mpi */ 5067 /*---------------------------------------*/ 5068 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5069 PetscCall(MatCreate(comm, &B_mpi)); 5070 if (n == PETSC_DECIDE) { 5071 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5072 } else { 5073 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5074 } 5075 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5076 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5077 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5078 MatPreallocateEnd(dnz, onz); 5079 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5080 5081 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5082 B_mpi->assembled = PETSC_FALSE; 5083 merge->bi = bi; 5084 merge->bj = bj; 5085 merge->buf_ri = buf_ri; 5086 merge->buf_rj = buf_rj; 5087 merge->coi = NULL; 5088 merge->coj = NULL; 5089 merge->owners_co = NULL; 5090 5091 PetscCall(PetscCommDestroy(&comm)); 5092 5093 /* attach the supporting struct to B_mpi for reuse */ 5094 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5095 PetscCall(PetscContainerSetPointer(container, merge)); 5096 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5097 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5098 PetscCall(PetscContainerDestroy(&container)); 5099 *mpimat = B_mpi; 5100 5101 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5102 PetscFunctionReturn(PETSC_SUCCESS); 5103 } 5104 5105 /*@C 5106 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5107 matrices from each processor 5108 5109 Collective 5110 5111 Input Parameters: 5112 + comm - the communicators the parallel matrix will live on 5113 . seqmat - the input sequential matrices 5114 . m - number of local rows (or `PETSC_DECIDE`) 5115 . n - number of local columns (or `PETSC_DECIDE`) 5116 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5117 5118 Output Parameter: 5119 . mpimat - the parallel matrix generated 5120 5121 Level: advanced 5122 5123 Note: 5124 The dimensions of the sequential matrix in each processor MUST be the same. 5125 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5126 destroyed when mpimat is destroyed. Call `PetscObjectQuery()` to access seqmat. 5127 @*/ 5128 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5129 { 5130 PetscMPIInt size; 5131 5132 PetscFunctionBegin; 5133 PetscCallMPI(MPI_Comm_size(comm, &size)); 5134 if (size == 1) { 5135 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5136 if (scall == MAT_INITIAL_MATRIX) { 5137 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5138 } else { 5139 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5140 } 5141 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5142 PetscFunctionReturn(PETSC_SUCCESS); 5143 } 5144 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5145 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5146 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5147 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5148 PetscFunctionReturn(PETSC_SUCCESS); 5149 } 5150 5151 /*@ 5152 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5153 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5154 with `MatGetSize()` 5155 5156 Not Collective 5157 5158 Input Parameters: 5159 + A - the matrix 5160 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5161 5162 Output Parameter: 5163 . A_loc - the local sequential matrix generated 5164 5165 Level: developer 5166 5167 Notes: 5168 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5169 5170 Destroy the matrix with `MatDestroy()` 5171 5172 .seealso: `MatMPIAIJGetLocalMat()` 5173 @*/ 5174 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5175 { 5176 PetscBool mpi; 5177 5178 PetscFunctionBegin; 5179 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5180 if (mpi) { 5181 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5182 } else { 5183 *A_loc = A; 5184 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5185 } 5186 PetscFunctionReturn(PETSC_SUCCESS); 5187 } 5188 5189 /*@ 5190 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5191 mlocal rows and n columns. Where mlocal is the row count obtained with `MatGetLocalSize()` and n is the global column count obtained 5192 with `MatGetSize()` 5193 5194 Not Collective 5195 5196 Input Parameters: 5197 + A - the matrix 5198 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5199 5200 Output Parameter: 5201 . A_loc - the local sequential matrix generated 5202 5203 Level: developer 5204 5205 Notes: 5206 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5207 5208 When the communicator associated with A has size 1 and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of A. 5209 If `MAT_REUSE_MATRIX` is requested with comm size 1, `MatCopy`(Adiag,*A_loc,`SAME_NONZERO_PATTERN`) is called. 5210 This means that one can preallocate the proper sequential matrix first and then call this routine with `MAT_REUSE_MATRIX` to safely 5211 modify the values of the returned A_loc. 5212 5213 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5214 @*/ 5215 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5216 { 5217 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5218 Mat_SeqAIJ *mat, *a, *b; 5219 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5220 const PetscScalar *aa, *ba, *aav, *bav; 5221 PetscScalar *ca, *cam; 5222 PetscMPIInt size; 5223 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5224 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5225 PetscBool match; 5226 5227 PetscFunctionBegin; 5228 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5229 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5230 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5231 if (size == 1) { 5232 if (scall == MAT_INITIAL_MATRIX) { 5233 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5234 *A_loc = mpimat->A; 5235 } else if (scall == MAT_REUSE_MATRIX) { 5236 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5237 } 5238 PetscFunctionReturn(PETSC_SUCCESS); 5239 } 5240 5241 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5242 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5243 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5244 ai = a->i; 5245 aj = a->j; 5246 bi = b->i; 5247 bj = b->j; 5248 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5249 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5250 aa = aav; 5251 ba = bav; 5252 if (scall == MAT_INITIAL_MATRIX) { 5253 PetscCall(PetscMalloc1(1 + am, &ci)); 5254 ci[0] = 0; 5255 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5256 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5257 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5258 k = 0; 5259 for (i = 0; i < am; i++) { 5260 ncols_o = bi[i + 1] - bi[i]; 5261 ncols_d = ai[i + 1] - ai[i]; 5262 /* off-diagonal portion of A */ 5263 for (jo = 0; jo < ncols_o; jo++) { 5264 col = cmap[*bj]; 5265 if (col >= cstart) break; 5266 cj[k] = col; 5267 bj++; 5268 ca[k++] = *ba++; 5269 } 5270 /* diagonal portion of A */ 5271 for (j = 0; j < ncols_d; j++) { 5272 cj[k] = cstart + *aj++; 5273 ca[k++] = *aa++; 5274 } 5275 /* off-diagonal portion of A */ 5276 for (j = jo; j < ncols_o; j++) { 5277 cj[k] = cmap[*bj++]; 5278 ca[k++] = *ba++; 5279 } 5280 } 5281 /* put together the new matrix */ 5282 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5283 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5284 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5285 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5286 mat->free_a = PETSC_TRUE; 5287 mat->free_ij = PETSC_TRUE; 5288 mat->nonew = 0; 5289 } else if (scall == MAT_REUSE_MATRIX) { 5290 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5291 ci = mat->i; 5292 cj = mat->j; 5293 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5294 for (i = 0; i < am; i++) { 5295 /* off-diagonal portion of A */ 5296 ncols_o = bi[i + 1] - bi[i]; 5297 for (jo = 0; jo < ncols_o; jo++) { 5298 col = cmap[*bj]; 5299 if (col >= cstart) break; 5300 *cam++ = *ba++; 5301 bj++; 5302 } 5303 /* diagonal portion of A */ 5304 ncols_d = ai[i + 1] - ai[i]; 5305 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5306 /* off-diagonal portion of A */ 5307 for (j = jo; j < ncols_o; j++) { 5308 *cam++ = *ba++; 5309 bj++; 5310 } 5311 } 5312 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5313 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5314 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5315 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5316 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5317 PetscFunctionReturn(PETSC_SUCCESS); 5318 } 5319 5320 /*@ 5321 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5322 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5323 5324 Not Collective 5325 5326 Input Parameters: 5327 + A - the matrix 5328 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5329 5330 Output Parameters: 5331 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5332 - A_loc - the local sequential matrix generated 5333 5334 Level: developer 5335 5336 Note: 5337 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the off diagonal part (in its local ordering) 5338 5339 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5340 @*/ 5341 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5342 { 5343 Mat Ao, Ad; 5344 const PetscInt *cmap; 5345 PetscMPIInt size; 5346 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5347 5348 PetscFunctionBegin; 5349 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5350 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5351 if (size == 1) { 5352 if (scall == MAT_INITIAL_MATRIX) { 5353 PetscCall(PetscObjectReference((PetscObject)Ad)); 5354 *A_loc = Ad; 5355 } else if (scall == MAT_REUSE_MATRIX) { 5356 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5357 } 5358 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5359 PetscFunctionReturn(PETSC_SUCCESS); 5360 } 5361 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5362 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5363 if (f) { 5364 PetscCall((*f)(A, scall, glob, A_loc)); 5365 } else { 5366 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5367 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5368 Mat_SeqAIJ *c; 5369 PetscInt *ai = a->i, *aj = a->j; 5370 PetscInt *bi = b->i, *bj = b->j; 5371 PetscInt *ci, *cj; 5372 const PetscScalar *aa, *ba; 5373 PetscScalar *ca; 5374 PetscInt i, j, am, dn, on; 5375 5376 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5377 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5378 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5379 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5380 if (scall == MAT_INITIAL_MATRIX) { 5381 PetscInt k; 5382 PetscCall(PetscMalloc1(1 + am, &ci)); 5383 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5384 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5385 ci[0] = 0; 5386 for (i = 0, k = 0; i < am; i++) { 5387 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5388 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5389 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5390 /* diagonal portion of A */ 5391 for (j = 0; j < ncols_d; j++, k++) { 5392 cj[k] = *aj++; 5393 ca[k] = *aa++; 5394 } 5395 /* off-diagonal portion of A */ 5396 for (j = 0; j < ncols_o; j++, k++) { 5397 cj[k] = dn + *bj++; 5398 ca[k] = *ba++; 5399 } 5400 } 5401 /* put together the new matrix */ 5402 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5403 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5404 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5405 c = (Mat_SeqAIJ *)(*A_loc)->data; 5406 c->free_a = PETSC_TRUE; 5407 c->free_ij = PETSC_TRUE; 5408 c->nonew = 0; 5409 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5410 } else if (scall == MAT_REUSE_MATRIX) { 5411 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5412 for (i = 0; i < am; i++) { 5413 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5414 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5415 /* diagonal portion of A */ 5416 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5417 /* off-diagonal portion of A */ 5418 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5419 } 5420 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5421 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5422 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5423 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5424 if (glob) { 5425 PetscInt cst, *gidx; 5426 5427 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5428 PetscCall(PetscMalloc1(dn + on, &gidx)); 5429 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5430 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5431 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5432 } 5433 } 5434 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5435 PetscFunctionReturn(PETSC_SUCCESS); 5436 } 5437 5438 /*@C 5439 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5440 5441 Not Collective 5442 5443 Input Parameters: 5444 + A - the matrix 5445 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5446 - row, col - index sets of rows and columns to extract (or NULL) 5447 5448 Output Parameter: 5449 . A_loc - the local sequential matrix generated 5450 5451 Level: developer 5452 5453 .seealso: `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5454 @*/ 5455 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5456 { 5457 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5458 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5459 IS isrowa, iscola; 5460 Mat *aloc; 5461 PetscBool match; 5462 5463 PetscFunctionBegin; 5464 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5465 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5466 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5467 if (!row) { 5468 start = A->rmap->rstart; 5469 end = A->rmap->rend; 5470 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5471 } else { 5472 isrowa = *row; 5473 } 5474 if (!col) { 5475 start = A->cmap->rstart; 5476 cmap = a->garray; 5477 nzA = a->A->cmap->n; 5478 nzB = a->B->cmap->n; 5479 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5480 ncols = 0; 5481 for (i = 0; i < nzB; i++) { 5482 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5483 else break; 5484 } 5485 imark = i; 5486 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5487 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5488 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5489 } else { 5490 iscola = *col; 5491 } 5492 if (scall != MAT_INITIAL_MATRIX) { 5493 PetscCall(PetscMalloc1(1, &aloc)); 5494 aloc[0] = *A_loc; 5495 } 5496 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5497 if (!col) { /* attach global id of condensed columns */ 5498 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5499 } 5500 *A_loc = aloc[0]; 5501 PetscCall(PetscFree(aloc)); 5502 if (!row) PetscCall(ISDestroy(&isrowa)); 5503 if (!col) PetscCall(ISDestroy(&iscola)); 5504 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5505 PetscFunctionReturn(PETSC_SUCCESS); 5506 } 5507 5508 /* 5509 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5510 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5511 * on a global size. 5512 * */ 5513 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5514 { 5515 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5516 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5517 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5518 PetscMPIInt owner; 5519 PetscSFNode *iremote, *oiremote; 5520 const PetscInt *lrowindices; 5521 PetscSF sf, osf; 5522 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5523 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5524 MPI_Comm comm; 5525 ISLocalToGlobalMapping mapping; 5526 const PetscScalar *pd_a, *po_a; 5527 5528 PetscFunctionBegin; 5529 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5530 /* plocalsize is the number of roots 5531 * nrows is the number of leaves 5532 * */ 5533 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5534 PetscCall(ISGetLocalSize(rows, &nrows)); 5535 PetscCall(PetscCalloc1(nrows, &iremote)); 5536 PetscCall(ISGetIndices(rows, &lrowindices)); 5537 for (i = 0; i < nrows; i++) { 5538 /* Find a remote index and an owner for a row 5539 * The row could be local or remote 5540 * */ 5541 owner = 0; 5542 lidx = 0; 5543 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5544 iremote[i].index = lidx; 5545 iremote[i].rank = owner; 5546 } 5547 /* Create SF to communicate how many nonzero columns for each row */ 5548 PetscCall(PetscSFCreate(comm, &sf)); 5549 /* SF will figure out the number of nonzero colunms for each row, and their 5550 * offsets 5551 * */ 5552 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5553 PetscCall(PetscSFSetFromOptions(sf)); 5554 PetscCall(PetscSFSetUp(sf)); 5555 5556 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5557 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5558 PetscCall(PetscCalloc1(nrows, &pnnz)); 5559 roffsets[0] = 0; 5560 roffsets[1] = 0; 5561 for (i = 0; i < plocalsize; i++) { 5562 /* diag */ 5563 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5564 /* off diag */ 5565 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5566 /* compute offsets so that we relative location for each row */ 5567 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5568 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5569 } 5570 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5571 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5572 /* 'r' means root, and 'l' means leaf */ 5573 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5574 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5575 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5577 PetscCall(PetscSFDestroy(&sf)); 5578 PetscCall(PetscFree(roffsets)); 5579 PetscCall(PetscFree(nrcols)); 5580 dntotalcols = 0; 5581 ontotalcols = 0; 5582 ncol = 0; 5583 for (i = 0; i < nrows; i++) { 5584 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5585 ncol = PetscMax(pnnz[i], ncol); 5586 /* diag */ 5587 dntotalcols += nlcols[i * 2 + 0]; 5588 /* off diag */ 5589 ontotalcols += nlcols[i * 2 + 1]; 5590 } 5591 /* We do not need to figure the right number of columns 5592 * since all the calculations will be done by going through the raw data 5593 * */ 5594 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5595 PetscCall(MatSetUp(*P_oth)); 5596 PetscCall(PetscFree(pnnz)); 5597 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5598 /* diag */ 5599 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5600 /* off diag */ 5601 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5602 /* diag */ 5603 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5604 /* off diag */ 5605 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5606 dntotalcols = 0; 5607 ontotalcols = 0; 5608 ntotalcols = 0; 5609 for (i = 0; i < nrows; i++) { 5610 owner = 0; 5611 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5612 /* Set iremote for diag matrix */ 5613 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5614 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5615 iremote[dntotalcols].rank = owner; 5616 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5617 ilocal[dntotalcols++] = ntotalcols++; 5618 } 5619 /* off diag */ 5620 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5621 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5622 oiremote[ontotalcols].rank = owner; 5623 oilocal[ontotalcols++] = ntotalcols++; 5624 } 5625 } 5626 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5627 PetscCall(PetscFree(loffsets)); 5628 PetscCall(PetscFree(nlcols)); 5629 PetscCall(PetscSFCreate(comm, &sf)); 5630 /* P serves as roots and P_oth is leaves 5631 * Diag matrix 5632 * */ 5633 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5634 PetscCall(PetscSFSetFromOptions(sf)); 5635 PetscCall(PetscSFSetUp(sf)); 5636 5637 PetscCall(PetscSFCreate(comm, &osf)); 5638 /* Off diag */ 5639 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5640 PetscCall(PetscSFSetFromOptions(osf)); 5641 PetscCall(PetscSFSetUp(osf)); 5642 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5643 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5644 /* We operate on the matrix internal data for saving memory */ 5645 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5646 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5648 /* Convert to global indices for diag matrix */ 5649 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5650 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5651 /* We want P_oth store global indices */ 5652 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5653 /* Use memory scalable approach */ 5654 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5655 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5656 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5657 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5658 /* Convert back to local indices */ 5659 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5660 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5661 nout = 0; 5662 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5663 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5664 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5665 /* Exchange values */ 5666 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5667 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5668 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5669 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5670 /* Stop PETSc from shrinking memory */ 5671 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5672 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5673 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5674 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5675 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5676 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5677 PetscCall(PetscSFDestroy(&sf)); 5678 PetscCall(PetscSFDestroy(&osf)); 5679 PetscFunctionReturn(PETSC_SUCCESS); 5680 } 5681 5682 /* 5683 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5684 * This supports MPIAIJ and MAIJ 5685 * */ 5686 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5687 { 5688 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5689 Mat_SeqAIJ *p_oth; 5690 IS rows, map; 5691 PetscHMapI hamp; 5692 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5693 MPI_Comm comm; 5694 PetscSF sf, osf; 5695 PetscBool has; 5696 5697 PetscFunctionBegin; 5698 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5699 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5700 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5701 * and then create a submatrix (that often is an overlapping matrix) 5702 * */ 5703 if (reuse == MAT_INITIAL_MATRIX) { 5704 /* Use a hash table to figure out unique keys */ 5705 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5706 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5707 count = 0; 5708 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5709 for (i = 0; i < a->B->cmap->n; i++) { 5710 key = a->garray[i] / dof; 5711 PetscCall(PetscHMapIHas(hamp, key, &has)); 5712 if (!has) { 5713 mapping[i] = count; 5714 PetscCall(PetscHMapISet(hamp, key, count++)); 5715 } else { 5716 /* Current 'i' has the same value the previous step */ 5717 mapping[i] = count - 1; 5718 } 5719 } 5720 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5721 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5722 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5723 PetscCall(PetscCalloc1(htsize, &rowindices)); 5724 off = 0; 5725 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5726 PetscCall(PetscHMapIDestroy(&hamp)); 5727 PetscCall(PetscSortInt(htsize, rowindices)); 5728 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5729 /* In case, the matrix was already created but users want to recreate the matrix */ 5730 PetscCall(MatDestroy(P_oth)); 5731 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5732 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5733 PetscCall(ISDestroy(&map)); 5734 PetscCall(ISDestroy(&rows)); 5735 } else if (reuse == MAT_REUSE_MATRIX) { 5736 /* If matrix was already created, we simply update values using SF objects 5737 * that as attached to the matrix earlier. 5738 */ 5739 const PetscScalar *pd_a, *po_a; 5740 5741 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5742 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5743 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5744 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5745 /* Update values in place */ 5746 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5747 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5748 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5749 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5750 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5752 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5753 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5754 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5755 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5756 PetscFunctionReturn(PETSC_SUCCESS); 5757 } 5758 5759 /*@C 5760 MatGetBrowsOfAcols - Returns `IS` that contain rows of B that equal to nonzero columns of local A 5761 5762 Collective 5763 5764 Input Parameters: 5765 + A - the first matrix in `MATMPIAIJ` format 5766 . B - the second matrix in `MATMPIAIJ` format 5767 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5768 5769 Output Parameters: 5770 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5771 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5772 - B_seq - the sequential matrix generated 5773 5774 Level: developer 5775 5776 @*/ 5777 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5778 { 5779 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5780 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5781 IS isrowb, iscolb; 5782 Mat *bseq = NULL; 5783 5784 PetscFunctionBegin; 5785 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5786 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5787 } 5788 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5789 5790 if (scall == MAT_INITIAL_MATRIX) { 5791 start = A->cmap->rstart; 5792 cmap = a->garray; 5793 nzA = a->A->cmap->n; 5794 nzB = a->B->cmap->n; 5795 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5796 ncols = 0; 5797 for (i = 0; i < nzB; i++) { /* row < local row index */ 5798 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5799 else break; 5800 } 5801 imark = i; 5802 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5803 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5804 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5805 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5806 } else { 5807 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5808 isrowb = *rowb; 5809 iscolb = *colb; 5810 PetscCall(PetscMalloc1(1, &bseq)); 5811 bseq[0] = *B_seq; 5812 } 5813 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5814 *B_seq = bseq[0]; 5815 PetscCall(PetscFree(bseq)); 5816 if (!rowb) { 5817 PetscCall(ISDestroy(&isrowb)); 5818 } else { 5819 *rowb = isrowb; 5820 } 5821 if (!colb) { 5822 PetscCall(ISDestroy(&iscolb)); 5823 } else { 5824 *colb = iscolb; 5825 } 5826 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5827 PetscFunctionReturn(PETSC_SUCCESS); 5828 } 5829 5830 /* 5831 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5832 of the OFF-DIAGONAL portion of local A 5833 5834 Collective 5835 5836 Input Parameters: 5837 + A,B - the matrices in mpiaij format 5838 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5839 5840 Output Parameter: 5841 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5842 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5843 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5844 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5845 5846 Developer Note: 5847 This directly accesses information inside the VecScatter associated with the matrix-vector product 5848 for this matrix. This is not desirable.. 5849 5850 Level: developer 5851 5852 */ 5853 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5854 { 5855 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5856 Mat_SeqAIJ *b_oth; 5857 VecScatter ctx; 5858 MPI_Comm comm; 5859 const PetscMPIInt *rprocs, *sprocs; 5860 const PetscInt *srow, *rstarts, *sstarts; 5861 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5862 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5863 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5864 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5865 PetscMPIInt size, tag, rank, nreqs; 5866 5867 PetscFunctionBegin; 5868 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5869 PetscCallMPI(MPI_Comm_size(comm, &size)); 5870 5871 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5872 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5873 } 5874 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5875 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5876 5877 if (size == 1) { 5878 startsj_s = NULL; 5879 bufa_ptr = NULL; 5880 *B_oth = NULL; 5881 PetscFunctionReturn(PETSC_SUCCESS); 5882 } 5883 5884 ctx = a->Mvctx; 5885 tag = ((PetscObject)ctx)->tag; 5886 5887 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5888 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5889 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5890 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5891 PetscCall(PetscMalloc1(nreqs, &reqs)); 5892 rwaits = reqs; 5893 swaits = reqs + nrecvs; 5894 5895 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5896 if (scall == MAT_INITIAL_MATRIX) { 5897 /* i-array */ 5898 /*---------*/ 5899 /* post receives */ 5900 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5901 for (i = 0; i < nrecvs; i++) { 5902 rowlen = rvalues + rstarts[i] * rbs; 5903 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5904 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5905 } 5906 5907 /* pack the outgoing message */ 5908 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5909 5910 sstartsj[0] = 0; 5911 rstartsj[0] = 0; 5912 len = 0; /* total length of j or a array to be sent */ 5913 if (nsends) { 5914 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5915 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5916 } 5917 for (i = 0; i < nsends; i++) { 5918 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5919 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5920 for (j = 0; j < nrows; j++) { 5921 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5922 for (l = 0; l < sbs; l++) { 5923 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5924 5925 rowlen[j * sbs + l] = ncols; 5926 5927 len += ncols; 5928 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5929 } 5930 k++; 5931 } 5932 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5933 5934 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5935 } 5936 /* recvs and sends of i-array are completed */ 5937 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5938 PetscCall(PetscFree(svalues)); 5939 5940 /* allocate buffers for sending j and a arrays */ 5941 PetscCall(PetscMalloc1(len + 1, &bufj)); 5942 PetscCall(PetscMalloc1(len + 1, &bufa)); 5943 5944 /* create i-array of B_oth */ 5945 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5946 5947 b_othi[0] = 0; 5948 len = 0; /* total length of j or a array to be received */ 5949 k = 0; 5950 for (i = 0; i < nrecvs; i++) { 5951 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5952 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5953 for (j = 0; j < nrows; j++) { 5954 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5955 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5956 k++; 5957 } 5958 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5959 } 5960 PetscCall(PetscFree(rvalues)); 5961 5962 /* allocate space for j and a arrays of B_oth */ 5963 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5964 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5965 5966 /* j-array */ 5967 /*---------*/ 5968 /* post receives of j-array */ 5969 for (i = 0; i < nrecvs; i++) { 5970 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5971 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5972 } 5973 5974 /* pack the outgoing message j-array */ 5975 if (nsends) k = sstarts[0]; 5976 for (i = 0; i < nsends; i++) { 5977 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5978 bufJ = bufj + sstartsj[i]; 5979 for (j = 0; j < nrows; j++) { 5980 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5981 for (ll = 0; ll < sbs; ll++) { 5982 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5983 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5984 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5985 } 5986 } 5987 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5988 } 5989 5990 /* recvs and sends of j-array are completed */ 5991 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5992 } else if (scall == MAT_REUSE_MATRIX) { 5993 sstartsj = *startsj_s; 5994 rstartsj = *startsj_r; 5995 bufa = *bufa_ptr; 5996 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5997 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5998 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5999 6000 /* a-array */ 6001 /*---------*/ 6002 /* post receives of a-array */ 6003 for (i = 0; i < nrecvs; i++) { 6004 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6005 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6006 } 6007 6008 /* pack the outgoing message a-array */ 6009 if (nsends) k = sstarts[0]; 6010 for (i = 0; i < nsends; i++) { 6011 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6012 bufA = bufa + sstartsj[i]; 6013 for (j = 0; j < nrows; j++) { 6014 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6015 for (ll = 0; ll < sbs; ll++) { 6016 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6017 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6018 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6019 } 6020 } 6021 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6022 } 6023 /* recvs and sends of a-array are completed */ 6024 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6025 PetscCall(PetscFree(reqs)); 6026 6027 if (scall == MAT_INITIAL_MATRIX) { 6028 /* put together the new matrix */ 6029 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6030 6031 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6032 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6033 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6034 b_oth->free_a = PETSC_TRUE; 6035 b_oth->free_ij = PETSC_TRUE; 6036 b_oth->nonew = 0; 6037 6038 PetscCall(PetscFree(bufj)); 6039 if (!startsj_s || !bufa_ptr) { 6040 PetscCall(PetscFree2(sstartsj, rstartsj)); 6041 PetscCall(PetscFree(bufa_ptr)); 6042 } else { 6043 *startsj_s = sstartsj; 6044 *startsj_r = rstartsj; 6045 *bufa_ptr = bufa; 6046 } 6047 } else if (scall == MAT_REUSE_MATRIX) { 6048 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6049 } 6050 6051 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6052 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6053 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6054 PetscFunctionReturn(PETSC_SUCCESS); 6055 } 6056 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6060 #if defined(PETSC_HAVE_MKL_SPARSE) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6065 #if defined(PETSC_HAVE_ELEMENTAL) 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_SCALAPACK) 6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 #if defined(PETSC_HAVE_HYPRE) 6072 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6073 #endif 6074 #if defined(PETSC_HAVE_CUDA) 6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6076 #endif 6077 #if defined(PETSC_HAVE_HIP) 6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6079 #endif 6080 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6081 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6082 #endif 6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6084 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6085 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6086 6087 /* 6088 Computes (B'*A')' since computing B*A directly is untenable 6089 6090 n p p 6091 [ ] [ ] [ ] 6092 m [ A ] * n [ B ] = m [ C ] 6093 [ ] [ ] [ ] 6094 6095 */ 6096 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6097 { 6098 Mat At, Bt, Ct; 6099 6100 PetscFunctionBegin; 6101 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6102 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6103 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 6104 PetscCall(MatDestroy(&At)); 6105 PetscCall(MatDestroy(&Bt)); 6106 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6107 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6108 PetscCall(MatDestroy(&Ct)); 6109 PetscFunctionReturn(PETSC_SUCCESS); 6110 } 6111 6112 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6113 { 6114 PetscBool cisdense; 6115 6116 PetscFunctionBegin; 6117 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6118 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6119 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6120 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6121 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6122 PetscCall(MatSetUp(C)); 6123 6124 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6125 PetscFunctionReturn(PETSC_SUCCESS); 6126 } 6127 6128 /* ----------------------------------------------------------------*/ 6129 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6130 { 6131 Mat_Product *product = C->product; 6132 Mat A = product->A, B = product->B; 6133 6134 PetscFunctionBegin; 6135 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 6136 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6137 6138 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6139 C->ops->productsymbolic = MatProductSymbolic_AB; 6140 PetscFunctionReturn(PETSC_SUCCESS); 6141 } 6142 6143 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6144 { 6145 Mat_Product *product = C->product; 6146 6147 PetscFunctionBegin; 6148 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6149 PetscFunctionReturn(PETSC_SUCCESS); 6150 } 6151 6152 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6153 6154 Input Parameters: 6155 6156 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 6157 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 6158 6159 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6160 6161 For Set1, j1[] contains column indices of the nonzeros. 6162 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6163 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6164 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6165 6166 Similar for Set2. 6167 6168 This routine merges the two sets of nonzeros row by row and removes repeats. 6169 6170 Output Parameters: (memory is allocated by the caller) 6171 6172 i[],j[]: the CSR of the merged matrix, which has m rows. 6173 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6174 imap2[]: similar to imap1[], but for Set2. 6175 Note we order nonzeros row-by-row and from left to right. 6176 */ 6177 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6178 { 6179 PetscInt r, m; /* Row index of mat */ 6180 PetscCount t, t1, t2, b1, e1, b2, e2; 6181 6182 PetscFunctionBegin; 6183 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6184 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6185 i[0] = 0; 6186 for (r = 0; r < m; r++) { /* Do row by row merging */ 6187 b1 = rowBegin1[r]; 6188 e1 = rowEnd1[r]; 6189 b2 = rowBegin2[r]; 6190 e2 = rowEnd2[r]; 6191 while (b1 < e1 && b2 < e2) { 6192 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6193 j[t] = j1[b1]; 6194 imap1[t1] = t; 6195 imap2[t2] = t; 6196 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6197 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6198 t1++; 6199 t2++; 6200 t++; 6201 } else if (j1[b1] < j2[b2]) { 6202 j[t] = j1[b1]; 6203 imap1[t1] = t; 6204 b1 += jmap1[t1 + 1] - jmap1[t1]; 6205 t1++; 6206 t++; 6207 } else { 6208 j[t] = j2[b2]; 6209 imap2[t2] = t; 6210 b2 += jmap2[t2 + 1] - jmap2[t2]; 6211 t2++; 6212 t++; 6213 } 6214 } 6215 /* Merge the remaining in either j1[] or j2[] */ 6216 while (b1 < e1) { 6217 j[t] = j1[b1]; 6218 imap1[t1] = t; 6219 b1 += jmap1[t1 + 1] - jmap1[t1]; 6220 t1++; 6221 t++; 6222 } 6223 while (b2 < e2) { 6224 j[t] = j2[b2]; 6225 imap2[t2] = t; 6226 b2 += jmap2[t2 + 1] - jmap2[t2]; 6227 t2++; 6228 t++; 6229 } 6230 i[r + 1] = t; 6231 } 6232 PetscFunctionReturn(PETSC_SUCCESS); 6233 } 6234 6235 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6236 6237 Input Parameters: 6238 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6239 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6240 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6241 6242 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6243 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6244 6245 Output Parameters: 6246 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6247 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6248 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6249 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6250 6251 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6252 Atot: number of entries belonging to the diagonal block. 6253 Annz: number of unique nonzeros belonging to the diagonal block. 6254 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6255 repeats (i.e., same 'i,j' pair). 6256 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6257 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6258 6259 Atot: number of entries belonging to the diagonal block 6260 Annz: number of unique nonzeros belonging to the diagonal block. 6261 6262 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6263 6264 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6265 */ 6266 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6267 { 6268 PetscInt cstart, cend, rstart, rend, row, col; 6269 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6270 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6271 PetscCount k, m, p, q, r, s, mid; 6272 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6273 6274 PetscFunctionBegin; 6275 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6276 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6277 m = rend - rstart; 6278 6279 for (k = 0; k < n; k++) { 6280 if (i[k] >= 0) break; 6281 } /* Skip negative rows */ 6282 6283 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6284 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6285 */ 6286 while (k < n) { 6287 row = i[k]; 6288 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6289 for (s = k; s < n; s++) 6290 if (i[s] != row) break; 6291 for (p = k; p < s; p++) { 6292 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6293 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6294 } 6295 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6296 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6297 rowBegin[row - rstart] = k; 6298 rowMid[row - rstart] = mid; 6299 rowEnd[row - rstart] = s; 6300 6301 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6302 Atot += mid - k; 6303 Btot += s - mid; 6304 6305 /* Count unique nonzeros of this diag/offdiag row */ 6306 for (p = k; p < mid;) { 6307 col = j[p]; 6308 do { 6309 j[p] += PETSC_MAX_INT; 6310 p++; 6311 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6312 Annz++; 6313 } 6314 6315 for (p = mid; p < s;) { 6316 col = j[p]; 6317 do { 6318 p++; 6319 } while (p < s && j[p] == col); 6320 Bnnz++; 6321 } 6322 k = s; 6323 } 6324 6325 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6326 PetscCall(PetscMalloc1(Atot, &Aperm)); 6327 PetscCall(PetscMalloc1(Btot, &Bperm)); 6328 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6329 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6330 6331 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6332 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6333 for (r = 0; r < m; r++) { 6334 k = rowBegin[r]; 6335 mid = rowMid[r]; 6336 s = rowEnd[r]; 6337 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6338 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6339 Atot += mid - k; 6340 Btot += s - mid; 6341 6342 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6343 for (p = k; p < mid;) { 6344 col = j[p]; 6345 q = p; 6346 do { 6347 p++; 6348 } while (p < mid && j[p] == col); 6349 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6350 Annz++; 6351 } 6352 6353 for (p = mid; p < s;) { 6354 col = j[p]; 6355 q = p; 6356 do { 6357 p++; 6358 } while (p < s && j[p] == col); 6359 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6360 Bnnz++; 6361 } 6362 } 6363 /* Output */ 6364 *Aperm_ = Aperm; 6365 *Annz_ = Annz; 6366 *Atot_ = Atot; 6367 *Ajmap_ = Ajmap; 6368 *Bperm_ = Bperm; 6369 *Bnnz_ = Bnnz; 6370 *Btot_ = Btot; 6371 *Bjmap_ = Bjmap; 6372 PetscFunctionReturn(PETSC_SUCCESS); 6373 } 6374 6375 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6376 6377 Input Parameters: 6378 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6379 nnz: number of unique nonzeros in the merged matrix 6380 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6381 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6382 6383 Output Parameter: (memory is allocated by the caller) 6384 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6385 6386 Example: 6387 nnz1 = 4 6388 nnz = 6 6389 imap = [1,3,4,5] 6390 jmap = [0,3,5,6,7] 6391 then, 6392 jmap_new = [0,0,3,3,5,6,7] 6393 */ 6394 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6395 { 6396 PetscCount k, p; 6397 6398 PetscFunctionBegin; 6399 jmap_new[0] = 0; 6400 p = nnz; /* p loops over jmap_new[] backwards */ 6401 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6402 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6403 } 6404 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6405 PetscFunctionReturn(PETSC_SUCCESS); 6406 } 6407 6408 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6409 { 6410 MPI_Comm comm; 6411 PetscMPIInt rank, size; 6412 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6413 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6414 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6415 6416 PetscFunctionBegin; 6417 PetscCall(PetscFree(mpiaij->garray)); 6418 PetscCall(VecDestroy(&mpiaij->lvec)); 6419 #if defined(PETSC_USE_CTABLE) 6420 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6421 #else 6422 PetscCall(PetscFree(mpiaij->colmap)); 6423 #endif 6424 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6425 mat->assembled = PETSC_FALSE; 6426 mat->was_assembled = PETSC_FALSE; 6427 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6428 6429 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6430 PetscCallMPI(MPI_Comm_size(comm, &size)); 6431 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6432 PetscCall(PetscLayoutSetUp(mat->rmap)); 6433 PetscCall(PetscLayoutSetUp(mat->cmap)); 6434 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6435 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6436 PetscCall(MatGetLocalSize(mat, &m, &n)); 6437 PetscCall(MatGetSize(mat, &M, &N)); 6438 6439 /* ---------------------------------------------------------------------------*/ 6440 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6441 /* entries come first, then local rows, then remote rows. */ 6442 /* ---------------------------------------------------------------------------*/ 6443 PetscCount n1 = coo_n, *perm1; 6444 PetscInt *i1 = coo_i, *j1 = coo_j; 6445 6446 PetscCall(PetscMalloc1(n1, &perm1)); 6447 for (k = 0; k < n1; k++) perm1[k] = k; 6448 6449 /* Manipulate indices so that entries with negative row or col indices will have smallest 6450 row indices, local entries will have greater but negative row indices, and remote entries 6451 will have positive row indices. 6452 */ 6453 for (k = 0; k < n1; k++) { 6454 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6455 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6456 else { 6457 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6458 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6459 } 6460 } 6461 6462 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6463 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6464 for (k = 0; k < n1; k++) { 6465 if (i1[k] > PETSC_MIN_INT) break; 6466 } /* Advance k to the first entry we need to take care of */ 6467 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6468 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6469 6470 /* ---------------------------------------------------------------------------*/ 6471 /* Split local rows into diag/offdiag portions */ 6472 /* ---------------------------------------------------------------------------*/ 6473 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6474 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6475 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6476 6477 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6478 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6479 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6480 6481 /* ---------------------------------------------------------------------------*/ 6482 /* Send remote rows to their owner */ 6483 /* ---------------------------------------------------------------------------*/ 6484 /* Find which rows should be sent to which remote ranks*/ 6485 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6486 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6487 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6488 const PetscInt *ranges; 6489 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6490 6491 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6492 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6493 for (k = rem; k < n1;) { 6494 PetscMPIInt owner; 6495 PetscInt firstRow, lastRow; 6496 6497 /* Locate a row range */ 6498 firstRow = i1[k]; /* first row of this owner */ 6499 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6500 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6501 6502 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6503 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6504 6505 /* All entries in [k,p) belong to this remote owner */ 6506 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6507 PetscMPIInt *sendto2; 6508 PetscInt *nentries2; 6509 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6510 6511 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6512 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6513 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6514 PetscCall(PetscFree2(sendto, nentries2)); 6515 sendto = sendto2; 6516 nentries = nentries2; 6517 maxNsend = maxNsend2; 6518 } 6519 sendto[nsend] = owner; 6520 nentries[nsend] = p - k; 6521 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6522 nsend++; 6523 k = p; 6524 } 6525 6526 /* Build 1st SF to know offsets on remote to send data */ 6527 PetscSF sf1; 6528 PetscInt nroots = 1, nroots2 = 0; 6529 PetscInt nleaves = nsend, nleaves2 = 0; 6530 PetscInt *offsets; 6531 PetscSFNode *iremote; 6532 6533 PetscCall(PetscSFCreate(comm, &sf1)); 6534 PetscCall(PetscMalloc1(nsend, &iremote)); 6535 PetscCall(PetscMalloc1(nsend, &offsets)); 6536 for (k = 0; k < nsend; k++) { 6537 iremote[k].rank = sendto[k]; 6538 iremote[k].index = 0; 6539 nleaves2 += nentries[k]; 6540 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6541 } 6542 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6543 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6544 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6545 PetscCall(PetscSFDestroy(&sf1)); 6546 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6547 6548 /* Build 2nd SF to send remote COOs to their owner */ 6549 PetscSF sf2; 6550 nroots = nroots2; 6551 nleaves = nleaves2; 6552 PetscCall(PetscSFCreate(comm, &sf2)); 6553 PetscCall(PetscSFSetFromOptions(sf2)); 6554 PetscCall(PetscMalloc1(nleaves, &iremote)); 6555 p = 0; 6556 for (k = 0; k < nsend; k++) { 6557 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6558 for (q = 0; q < nentries[k]; q++, p++) { 6559 iremote[p].rank = sendto[k]; 6560 iremote[p].index = offsets[k] + q; 6561 } 6562 } 6563 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6564 6565 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6566 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6567 6568 /* Send the remote COOs to their owner */ 6569 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6570 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6571 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6572 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6573 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6574 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6575 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6576 6577 PetscCall(PetscFree(offsets)); 6578 PetscCall(PetscFree2(sendto, nentries)); 6579 6580 /* ---------------------------------------------------------------*/ 6581 /* Sort received COOs by row along with the permutation array */ 6582 /* ---------------------------------------------------------------*/ 6583 for (k = 0; k < n2; k++) perm2[k] = k; 6584 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6585 6586 /* ---------------------------------------------------------------*/ 6587 /* Split received COOs into diag/offdiag portions */ 6588 /* ---------------------------------------------------------------*/ 6589 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6590 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6591 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6592 6593 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6594 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6595 6596 /* --------------------------------------------------------------------------*/ 6597 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6598 /* --------------------------------------------------------------------------*/ 6599 PetscInt *Ai, *Bi; 6600 PetscInt *Aj, *Bj; 6601 6602 PetscCall(PetscMalloc1(m + 1, &Ai)); 6603 PetscCall(PetscMalloc1(m + 1, &Bi)); 6604 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6605 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6606 6607 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6608 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6609 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6610 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6611 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6612 6613 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6614 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6615 6616 /* --------------------------------------------------------------------------*/ 6617 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6618 /* expect nonzeros in A/B most likely have local contributing entries */ 6619 /* --------------------------------------------------------------------------*/ 6620 PetscInt Annz = Ai[m]; 6621 PetscInt Bnnz = Bi[m]; 6622 PetscCount *Ajmap1_new, *Bjmap1_new; 6623 6624 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6625 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6626 6627 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6628 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6629 6630 PetscCall(PetscFree(Aimap1)); 6631 PetscCall(PetscFree(Ajmap1)); 6632 PetscCall(PetscFree(Bimap1)); 6633 PetscCall(PetscFree(Bjmap1)); 6634 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6635 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6636 PetscCall(PetscFree(perm1)); 6637 PetscCall(PetscFree3(i2, j2, perm2)); 6638 6639 Ajmap1 = Ajmap1_new; 6640 Bjmap1 = Bjmap1_new; 6641 6642 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6643 if (Annz < Annz1 + Annz2) { 6644 PetscInt *Aj_new; 6645 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6646 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6647 PetscCall(PetscFree(Aj)); 6648 Aj = Aj_new; 6649 } 6650 6651 if (Bnnz < Bnnz1 + Bnnz2) { 6652 PetscInt *Bj_new; 6653 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6654 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6655 PetscCall(PetscFree(Bj)); 6656 Bj = Bj_new; 6657 } 6658 6659 /* --------------------------------------------------------------------------------*/ 6660 /* Create new submatrices for on-process and off-process coupling */ 6661 /* --------------------------------------------------------------------------------*/ 6662 PetscScalar *Aa, *Ba; 6663 MatType rtype; 6664 Mat_SeqAIJ *a, *b; 6665 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6666 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6667 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6668 if (cstart) { 6669 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6670 } 6671 PetscCall(MatDestroy(&mpiaij->A)); 6672 PetscCall(MatDestroy(&mpiaij->B)); 6673 PetscCall(MatGetRootType_Private(mat, &rtype)); 6674 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6675 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6676 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6677 6678 a = (Mat_SeqAIJ *)mpiaij->A->data; 6679 b = (Mat_SeqAIJ *)mpiaij->B->data; 6680 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6681 a->free_a = b->free_a = PETSC_TRUE; 6682 a->free_ij = b->free_ij = PETSC_TRUE; 6683 6684 /* conversion must happen AFTER multiply setup */ 6685 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6686 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6687 PetscCall(VecDestroy(&mpiaij->lvec)); 6688 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6689 6690 mpiaij->coo_n = coo_n; 6691 mpiaij->coo_sf = sf2; 6692 mpiaij->sendlen = nleaves; 6693 mpiaij->recvlen = nroots; 6694 6695 mpiaij->Annz = Annz; 6696 mpiaij->Bnnz = Bnnz; 6697 6698 mpiaij->Annz2 = Annz2; 6699 mpiaij->Bnnz2 = Bnnz2; 6700 6701 mpiaij->Atot1 = Atot1; 6702 mpiaij->Atot2 = Atot2; 6703 mpiaij->Btot1 = Btot1; 6704 mpiaij->Btot2 = Btot2; 6705 6706 mpiaij->Ajmap1 = Ajmap1; 6707 mpiaij->Aperm1 = Aperm1; 6708 6709 mpiaij->Bjmap1 = Bjmap1; 6710 mpiaij->Bperm1 = Bperm1; 6711 6712 mpiaij->Aimap2 = Aimap2; 6713 mpiaij->Ajmap2 = Ajmap2; 6714 mpiaij->Aperm2 = Aperm2; 6715 6716 mpiaij->Bimap2 = Bimap2; 6717 mpiaij->Bjmap2 = Bjmap2; 6718 mpiaij->Bperm2 = Bperm2; 6719 6720 mpiaij->Cperm1 = Cperm1; 6721 6722 /* Allocate in preallocation. If not used, it has zero cost on host */ 6723 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6724 PetscFunctionReturn(PETSC_SUCCESS); 6725 } 6726 6727 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6728 { 6729 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6730 Mat A = mpiaij->A, B = mpiaij->B; 6731 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6732 PetscScalar *Aa, *Ba; 6733 PetscScalar *sendbuf = mpiaij->sendbuf; 6734 PetscScalar *recvbuf = mpiaij->recvbuf; 6735 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6736 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6737 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6738 const PetscCount *Cperm1 = mpiaij->Cperm1; 6739 6740 PetscFunctionBegin; 6741 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6742 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6743 6744 /* Pack entries to be sent to remote */ 6745 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6746 6747 /* Send remote entries to their owner and overlap the communication with local computation */ 6748 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6749 /* Add local entries to A and B */ 6750 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6751 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6752 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6753 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6754 } 6755 for (PetscCount i = 0; i < Bnnz; i++) { 6756 PetscScalar sum = 0.0; 6757 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6758 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6759 } 6760 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6761 6762 /* Add received remote entries to A and B */ 6763 for (PetscCount i = 0; i < Annz2; i++) { 6764 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6765 } 6766 for (PetscCount i = 0; i < Bnnz2; i++) { 6767 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6768 } 6769 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6770 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6771 PetscFunctionReturn(PETSC_SUCCESS); 6772 } 6773 6774 /*MC 6775 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6776 6777 Options Database Keys: 6778 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6779 6780 Level: beginner 6781 6782 Notes: 6783 `MatSetValues()` may be called for this matrix type with a NULL argument for the numerical values, 6784 in this case the values associated with the rows and columns one passes in are set to zero 6785 in the matrix 6786 6787 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6788 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6789 6790 .seealso: `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6791 M*/ 6792 6793 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6794 { 6795 Mat_MPIAIJ *b; 6796 PetscMPIInt size; 6797 6798 PetscFunctionBegin; 6799 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6800 6801 PetscCall(PetscNew(&b)); 6802 B->data = (void *)b; 6803 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6804 B->assembled = PETSC_FALSE; 6805 B->insertmode = NOT_SET_VALUES; 6806 b->size = size; 6807 6808 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6809 6810 /* build cache for off array entries formed */ 6811 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6812 6813 b->donotstash = PETSC_FALSE; 6814 b->colmap = NULL; 6815 b->garray = NULL; 6816 b->roworiented = PETSC_TRUE; 6817 6818 /* stuff used for matrix vector multiply */ 6819 b->lvec = NULL; 6820 b->Mvctx = NULL; 6821 6822 /* stuff for MatGetRow() */ 6823 b->rowindices = NULL; 6824 b->rowvalues = NULL; 6825 b->getrowactive = PETSC_FALSE; 6826 6827 /* flexible pointer used in CUSPARSE classes */ 6828 b->spptr = NULL; 6829 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6840 #if defined(PETSC_HAVE_CUDA) 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6842 #endif 6843 #if defined(PETSC_HAVE_HIP) 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6845 #endif 6846 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6848 #endif 6849 #if defined(PETSC_HAVE_MKL_SPARSE) 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6851 #endif 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6856 #if defined(PETSC_HAVE_ELEMENTAL) 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6858 #endif 6859 #if defined(PETSC_HAVE_SCALAPACK) 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6861 #endif 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6864 #if defined(PETSC_HAVE_HYPRE) 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6867 #endif 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6872 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6873 PetscFunctionReturn(PETSC_SUCCESS); 6874 } 6875 6876 /*@C 6877 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6878 and "off-diagonal" part of the matrix in CSR format. 6879 6880 Collective 6881 6882 Input Parameters: 6883 + comm - MPI communicator 6884 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6885 . n - This value should be the same as the local size used in creating the 6886 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 6887 calculated if N is given) For square matrices n is almost always m. 6888 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 6889 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 6890 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6891 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6892 . a - matrix values 6893 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6894 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6895 - oa - matrix values 6896 6897 Output Parameter: 6898 . mat - the matrix 6899 6900 Level: advanced 6901 6902 Notes: 6903 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6904 must free the arrays once the matrix has been destroyed and not before. 6905 6906 The i and j indices are 0 based 6907 6908 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6909 6910 This sets local rows and cannot be used to set off-processor values. 6911 6912 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6913 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6914 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6915 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6916 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6917 communication if it is known that only local entries will be set. 6918 6919 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6920 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6921 @*/ 6922 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6923 { 6924 Mat_MPIAIJ *maij; 6925 6926 PetscFunctionBegin; 6927 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6928 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6929 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6930 PetscCall(MatCreate(comm, mat)); 6931 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6932 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6933 maij = (Mat_MPIAIJ *)(*mat)->data; 6934 6935 (*mat)->preallocated = PETSC_TRUE; 6936 6937 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6938 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6939 6940 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6941 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6942 6943 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6944 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6945 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6946 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6947 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6948 PetscFunctionReturn(PETSC_SUCCESS); 6949 } 6950 6951 typedef struct { 6952 Mat *mp; /* intermediate products */ 6953 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6954 PetscInt cp; /* number of intermediate products */ 6955 6956 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6957 PetscInt *startsj_s, *startsj_r; 6958 PetscScalar *bufa; 6959 Mat P_oth; 6960 6961 /* may take advantage of merging product->B */ 6962 Mat Bloc; /* B-local by merging diag and off-diag */ 6963 6964 /* cusparse does not have support to split between symbolic and numeric phases. 6965 When api_user is true, we don't need to update the numerical values 6966 of the temporary storage */ 6967 PetscBool reusesym; 6968 6969 /* support for COO values insertion */ 6970 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6971 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6972 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6973 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6974 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6975 PetscMemType mtype; 6976 6977 /* customization */ 6978 PetscBool abmerge; 6979 PetscBool P_oth_bind; 6980 } MatMatMPIAIJBACKEND; 6981 6982 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6983 { 6984 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6985 PetscInt i; 6986 6987 PetscFunctionBegin; 6988 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6989 PetscCall(PetscFree(mmdata->bufa)); 6990 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6991 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6992 PetscCall(MatDestroy(&mmdata->P_oth)); 6993 PetscCall(MatDestroy(&mmdata->Bloc)); 6994 PetscCall(PetscSFDestroy(&mmdata->sf)); 6995 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6996 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6997 PetscCall(PetscFree(mmdata->own[0])); 6998 PetscCall(PetscFree(mmdata->own)); 6999 PetscCall(PetscFree(mmdata->off[0])); 7000 PetscCall(PetscFree(mmdata->off)); 7001 PetscCall(PetscFree(mmdata)); 7002 PetscFunctionReturn(PETSC_SUCCESS); 7003 } 7004 7005 /* Copy selected n entries with indices in idx[] of A to v[]. 7006 If idx is NULL, copy the whole data array of A to v[] 7007 */ 7008 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7009 { 7010 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7011 7012 PetscFunctionBegin; 7013 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7014 if (f) { 7015 PetscCall((*f)(A, n, idx, v)); 7016 } else { 7017 const PetscScalar *vv; 7018 7019 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7020 if (n && idx) { 7021 PetscScalar *w = v; 7022 const PetscInt *oi = idx; 7023 PetscInt j; 7024 7025 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7026 } else { 7027 PetscCall(PetscArraycpy(v, vv, n)); 7028 } 7029 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7030 } 7031 PetscFunctionReturn(PETSC_SUCCESS); 7032 } 7033 7034 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7035 { 7036 MatMatMPIAIJBACKEND *mmdata; 7037 PetscInt i, n_d, n_o; 7038 7039 PetscFunctionBegin; 7040 MatCheckProduct(C, 1); 7041 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7042 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7043 if (!mmdata->reusesym) { /* update temporary matrices */ 7044 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7045 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7046 } 7047 mmdata->reusesym = PETSC_FALSE; 7048 7049 for (i = 0; i < mmdata->cp; i++) { 7050 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7051 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7052 } 7053 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7054 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 7055 7056 if (mmdata->mptmp[i]) continue; 7057 if (noff) { 7058 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 7059 7060 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7061 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7062 n_o += noff; 7063 n_d += nown; 7064 } else { 7065 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7066 7067 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7068 n_d += mm->nz; 7069 } 7070 } 7071 if (mmdata->hasoffproc) { /* offprocess insertion */ 7072 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7073 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7074 } 7075 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7076 PetscFunctionReturn(PETSC_SUCCESS); 7077 } 7078 7079 /* Support for Pt * A, A * P, or Pt * A * P */ 7080 #define MAX_NUMBER_INTERMEDIATE 4 7081 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7082 { 7083 Mat_Product *product = C->product; 7084 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7085 Mat_MPIAIJ *a, *p; 7086 MatMatMPIAIJBACKEND *mmdata; 7087 ISLocalToGlobalMapping P_oth_l2g = NULL; 7088 IS glob = NULL; 7089 const char *prefix; 7090 char pprefix[256]; 7091 const PetscInt *globidx, *P_oth_idx; 7092 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7093 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7094 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7095 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7096 /* a base offset; type-2: sparse with a local to global map table */ 7097 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7098 7099 MatProductType ptype; 7100 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7101 PetscMPIInt size; 7102 7103 PetscFunctionBegin; 7104 MatCheckProduct(C, 1); 7105 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7106 ptype = product->type; 7107 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7108 ptype = MATPRODUCT_AB; 7109 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7110 } 7111 switch (ptype) { 7112 case MATPRODUCT_AB: 7113 A = product->A; 7114 P = product->B; 7115 m = A->rmap->n; 7116 n = P->cmap->n; 7117 M = A->rmap->N; 7118 N = P->cmap->N; 7119 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7120 break; 7121 case MATPRODUCT_AtB: 7122 P = product->A; 7123 A = product->B; 7124 m = P->cmap->n; 7125 n = A->cmap->n; 7126 M = P->cmap->N; 7127 N = A->cmap->N; 7128 hasoffproc = PETSC_TRUE; 7129 break; 7130 case MATPRODUCT_PtAP: 7131 A = product->A; 7132 P = product->B; 7133 m = P->cmap->n; 7134 n = P->cmap->n; 7135 M = P->cmap->N; 7136 N = P->cmap->N; 7137 hasoffproc = PETSC_TRUE; 7138 break; 7139 default: 7140 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7141 } 7142 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7143 if (size == 1) hasoffproc = PETSC_FALSE; 7144 7145 /* defaults */ 7146 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7147 mp[i] = NULL; 7148 mptmp[i] = PETSC_FALSE; 7149 rmapt[i] = -1; 7150 cmapt[i] = -1; 7151 rmapa[i] = NULL; 7152 cmapa[i] = NULL; 7153 } 7154 7155 /* customization */ 7156 PetscCall(PetscNew(&mmdata)); 7157 mmdata->reusesym = product->api_user; 7158 if (ptype == MATPRODUCT_AB) { 7159 if (product->api_user) { 7160 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7161 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7162 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7163 PetscOptionsEnd(); 7164 } else { 7165 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7166 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7167 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7168 PetscOptionsEnd(); 7169 } 7170 } else if (ptype == MATPRODUCT_PtAP) { 7171 if (product->api_user) { 7172 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7173 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7174 PetscOptionsEnd(); 7175 } else { 7176 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7177 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7178 PetscOptionsEnd(); 7179 } 7180 } 7181 a = (Mat_MPIAIJ *)A->data; 7182 p = (Mat_MPIAIJ *)P->data; 7183 PetscCall(MatSetSizes(C, m, n, M, N)); 7184 PetscCall(PetscLayoutSetUp(C->rmap)); 7185 PetscCall(PetscLayoutSetUp(C->cmap)); 7186 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7187 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7188 7189 cp = 0; 7190 switch (ptype) { 7191 case MATPRODUCT_AB: /* A * P */ 7192 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7193 7194 /* A_diag * P_local (merged or not) */ 7195 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7196 /* P is product->B */ 7197 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7198 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7199 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7200 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7201 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7202 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7203 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7204 mp[cp]->product->api_user = product->api_user; 7205 PetscCall(MatProductSetFromOptions(mp[cp])); 7206 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7207 PetscCall(ISGetIndices(glob, &globidx)); 7208 rmapt[cp] = 1; 7209 cmapt[cp] = 2; 7210 cmapa[cp] = globidx; 7211 mptmp[cp] = PETSC_FALSE; 7212 cp++; 7213 } else { /* A_diag * P_diag and A_diag * P_off */ 7214 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7215 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7216 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7217 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7218 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7219 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7220 mp[cp]->product->api_user = product->api_user; 7221 PetscCall(MatProductSetFromOptions(mp[cp])); 7222 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7223 rmapt[cp] = 1; 7224 cmapt[cp] = 1; 7225 mptmp[cp] = PETSC_FALSE; 7226 cp++; 7227 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7228 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7229 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7230 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7231 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7232 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7233 mp[cp]->product->api_user = product->api_user; 7234 PetscCall(MatProductSetFromOptions(mp[cp])); 7235 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7236 rmapt[cp] = 1; 7237 cmapt[cp] = 2; 7238 cmapa[cp] = p->garray; 7239 mptmp[cp] = PETSC_FALSE; 7240 cp++; 7241 } 7242 7243 /* A_off * P_other */ 7244 if (mmdata->P_oth) { 7245 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7246 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7247 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7248 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7249 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7250 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7251 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7252 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7253 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7254 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7255 mp[cp]->product->api_user = product->api_user; 7256 PetscCall(MatProductSetFromOptions(mp[cp])); 7257 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7258 rmapt[cp] = 1; 7259 cmapt[cp] = 2; 7260 cmapa[cp] = P_oth_idx; 7261 mptmp[cp] = PETSC_FALSE; 7262 cp++; 7263 } 7264 break; 7265 7266 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7267 /* A is product->B */ 7268 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7269 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7270 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7271 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7272 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7273 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7274 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7275 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7276 mp[cp]->product->api_user = product->api_user; 7277 PetscCall(MatProductSetFromOptions(mp[cp])); 7278 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7279 PetscCall(ISGetIndices(glob, &globidx)); 7280 rmapt[cp] = 2; 7281 rmapa[cp] = globidx; 7282 cmapt[cp] = 2; 7283 cmapa[cp] = globidx; 7284 mptmp[cp] = PETSC_FALSE; 7285 cp++; 7286 } else { 7287 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7288 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7289 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7290 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7291 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7292 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7293 mp[cp]->product->api_user = product->api_user; 7294 PetscCall(MatProductSetFromOptions(mp[cp])); 7295 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7296 PetscCall(ISGetIndices(glob, &globidx)); 7297 rmapt[cp] = 1; 7298 cmapt[cp] = 2; 7299 cmapa[cp] = globidx; 7300 mptmp[cp] = PETSC_FALSE; 7301 cp++; 7302 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7303 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7304 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7305 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7306 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7307 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7308 mp[cp]->product->api_user = product->api_user; 7309 PetscCall(MatProductSetFromOptions(mp[cp])); 7310 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7311 rmapt[cp] = 2; 7312 rmapa[cp] = p->garray; 7313 cmapt[cp] = 2; 7314 cmapa[cp] = globidx; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 } 7318 break; 7319 case MATPRODUCT_PtAP: 7320 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7321 /* P is product->B */ 7322 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7323 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7324 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7325 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7326 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7327 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7328 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7329 mp[cp]->product->api_user = product->api_user; 7330 PetscCall(MatProductSetFromOptions(mp[cp])); 7331 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7332 PetscCall(ISGetIndices(glob, &globidx)); 7333 rmapt[cp] = 2; 7334 rmapa[cp] = globidx; 7335 cmapt[cp] = 2; 7336 cmapa[cp] = globidx; 7337 mptmp[cp] = PETSC_FALSE; 7338 cp++; 7339 if (mmdata->P_oth) { 7340 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7341 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7342 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7343 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7344 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7345 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7346 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7347 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7348 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7349 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7350 mp[cp]->product->api_user = product->api_user; 7351 PetscCall(MatProductSetFromOptions(mp[cp])); 7352 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7353 mptmp[cp] = PETSC_TRUE; 7354 cp++; 7355 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7356 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7357 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7358 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7359 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7360 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7361 mp[cp]->product->api_user = product->api_user; 7362 PetscCall(MatProductSetFromOptions(mp[cp])); 7363 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7364 rmapt[cp] = 2; 7365 rmapa[cp] = globidx; 7366 cmapt[cp] = 2; 7367 cmapa[cp] = P_oth_idx; 7368 mptmp[cp] = PETSC_FALSE; 7369 cp++; 7370 } 7371 break; 7372 default: 7373 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7374 } 7375 /* sanity check */ 7376 if (size > 1) 7377 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7378 7379 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7380 for (i = 0; i < cp; i++) { 7381 mmdata->mp[i] = mp[i]; 7382 mmdata->mptmp[i] = mptmp[i]; 7383 } 7384 mmdata->cp = cp; 7385 C->product->data = mmdata; 7386 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7387 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7388 7389 /* memory type */ 7390 mmdata->mtype = PETSC_MEMTYPE_HOST; 7391 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7392 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7393 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7394 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7395 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7396 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7397 7398 /* prepare coo coordinates for values insertion */ 7399 7400 /* count total nonzeros of those intermediate seqaij Mats 7401 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7402 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7403 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7404 */ 7405 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7406 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7407 if (mptmp[cp]) continue; 7408 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7409 const PetscInt *rmap = rmapa[cp]; 7410 const PetscInt mr = mp[cp]->rmap->n; 7411 const PetscInt rs = C->rmap->rstart; 7412 const PetscInt re = C->rmap->rend; 7413 const PetscInt *ii = mm->i; 7414 for (i = 0; i < mr; i++) { 7415 const PetscInt gr = rmap[i]; 7416 const PetscInt nz = ii[i + 1] - ii[i]; 7417 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7418 else ncoo_oown += nz; /* this row is local */ 7419 } 7420 } else ncoo_d += mm->nz; 7421 } 7422 7423 /* 7424 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7425 7426 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7427 7428 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7429 7430 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7431 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7432 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7433 7434 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7435 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7436 */ 7437 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7438 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7439 7440 /* gather (i,j) of nonzeros inserted by remote procs */ 7441 if (hasoffproc) { 7442 PetscSF msf; 7443 PetscInt ncoo2, *coo_i2, *coo_j2; 7444 7445 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7446 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7447 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7448 7449 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7450 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7451 PetscInt *idxoff = mmdata->off[cp]; 7452 PetscInt *idxown = mmdata->own[cp]; 7453 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7454 const PetscInt *rmap = rmapa[cp]; 7455 const PetscInt *cmap = cmapa[cp]; 7456 const PetscInt *ii = mm->i; 7457 PetscInt *coi = coo_i + ncoo_o; 7458 PetscInt *coj = coo_j + ncoo_o; 7459 const PetscInt mr = mp[cp]->rmap->n; 7460 const PetscInt rs = C->rmap->rstart; 7461 const PetscInt re = C->rmap->rend; 7462 const PetscInt cs = C->cmap->rstart; 7463 for (i = 0; i < mr; i++) { 7464 const PetscInt *jj = mm->j + ii[i]; 7465 const PetscInt gr = rmap[i]; 7466 const PetscInt nz = ii[i + 1] - ii[i]; 7467 if (gr < rs || gr >= re) { /* this is an offproc row */ 7468 for (j = ii[i]; j < ii[i + 1]; j++) { 7469 *coi++ = gr; 7470 *idxoff++ = j; 7471 } 7472 if (!cmapt[cp]) { /* already global */ 7473 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7474 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7475 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7476 } else { /* offdiag */ 7477 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7478 } 7479 ncoo_o += nz; 7480 } else { /* this is a local row */ 7481 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7482 } 7483 } 7484 } 7485 mmdata->off[cp + 1] = idxoff; 7486 mmdata->own[cp + 1] = idxown; 7487 } 7488 7489 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7490 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7491 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7492 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7493 ncoo = ncoo_d + ncoo_oown + ncoo2; 7494 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7495 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7496 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7497 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7498 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7499 PetscCall(PetscFree2(coo_i, coo_j)); 7500 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7501 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7502 coo_i = coo_i2; 7503 coo_j = coo_j2; 7504 } else { /* no offproc values insertion */ 7505 ncoo = ncoo_d; 7506 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7507 7508 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7509 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7510 PetscCall(PetscSFSetUp(mmdata->sf)); 7511 } 7512 mmdata->hasoffproc = hasoffproc; 7513 7514 /* gather (i,j) of nonzeros inserted locally */ 7515 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7516 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7517 PetscInt *coi = coo_i + ncoo_d; 7518 PetscInt *coj = coo_j + ncoo_d; 7519 const PetscInt *jj = mm->j; 7520 const PetscInt *ii = mm->i; 7521 const PetscInt *cmap = cmapa[cp]; 7522 const PetscInt *rmap = rmapa[cp]; 7523 const PetscInt mr = mp[cp]->rmap->n; 7524 const PetscInt rs = C->rmap->rstart; 7525 const PetscInt re = C->rmap->rend; 7526 const PetscInt cs = C->cmap->rstart; 7527 7528 if (mptmp[cp]) continue; 7529 if (rmapt[cp] == 1) { /* consecutive rows */ 7530 /* fill coo_i */ 7531 for (i = 0; i < mr; i++) { 7532 const PetscInt gr = i + rs; 7533 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7534 } 7535 /* fill coo_j */ 7536 if (!cmapt[cp]) { /* type-0, already global */ 7537 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7538 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7539 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7540 } else { /* type-2, local to global for sparse columns */ 7541 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7542 } 7543 ncoo_d += mm->nz; 7544 } else if (rmapt[cp] == 2) { /* sparse rows */ 7545 for (i = 0; i < mr; i++) { 7546 const PetscInt *jj = mm->j + ii[i]; 7547 const PetscInt gr = rmap[i]; 7548 const PetscInt nz = ii[i + 1] - ii[i]; 7549 if (gr >= rs && gr < re) { /* local rows */ 7550 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7551 if (!cmapt[cp]) { /* type-0, already global */ 7552 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7553 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7554 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7555 } else { /* type-2, local to global for sparse columns */ 7556 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7557 } 7558 ncoo_d += nz; 7559 } 7560 } 7561 } 7562 } 7563 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7564 PetscCall(ISDestroy(&glob)); 7565 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7566 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7567 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7568 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7569 7570 /* preallocate with COO data */ 7571 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7572 PetscCall(PetscFree2(coo_i, coo_j)); 7573 PetscFunctionReturn(PETSC_SUCCESS); 7574 } 7575 7576 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7577 { 7578 Mat_Product *product = mat->product; 7579 #if defined(PETSC_HAVE_DEVICE) 7580 PetscBool match = PETSC_FALSE; 7581 PetscBool usecpu = PETSC_FALSE; 7582 #else 7583 PetscBool match = PETSC_TRUE; 7584 #endif 7585 7586 PetscFunctionBegin; 7587 MatCheckProduct(mat, 1); 7588 #if defined(PETSC_HAVE_DEVICE) 7589 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7590 if (match) { /* we can always fallback to the CPU if requested */ 7591 switch (product->type) { 7592 case MATPRODUCT_AB: 7593 if (product->api_user) { 7594 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7595 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7596 PetscOptionsEnd(); 7597 } else { 7598 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7599 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7600 PetscOptionsEnd(); 7601 } 7602 break; 7603 case MATPRODUCT_AtB: 7604 if (product->api_user) { 7605 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7606 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7607 PetscOptionsEnd(); 7608 } else { 7609 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7610 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7611 PetscOptionsEnd(); 7612 } 7613 break; 7614 case MATPRODUCT_PtAP: 7615 if (product->api_user) { 7616 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7617 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7618 PetscOptionsEnd(); 7619 } else { 7620 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7621 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7622 PetscOptionsEnd(); 7623 } 7624 break; 7625 default: 7626 break; 7627 } 7628 match = (PetscBool)!usecpu; 7629 } 7630 #endif 7631 if (match) { 7632 switch (product->type) { 7633 case MATPRODUCT_AB: 7634 case MATPRODUCT_AtB: 7635 case MATPRODUCT_PtAP: 7636 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7637 break; 7638 default: 7639 break; 7640 } 7641 } 7642 /* fallback to MPIAIJ ops */ 7643 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7644 PetscFunctionReturn(PETSC_SUCCESS); 7645 } 7646 7647 /* 7648 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7649 7650 n - the number of block indices in cc[] 7651 cc - the block indices (must be large enough to contain the indices) 7652 */ 7653 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7654 { 7655 PetscInt cnt = -1, nidx, j; 7656 const PetscInt *idx; 7657 7658 PetscFunctionBegin; 7659 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7660 if (nidx) { 7661 cnt = 0; 7662 cc[cnt] = idx[0] / bs; 7663 for (j = 1; j < nidx; j++) { 7664 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7665 } 7666 } 7667 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7668 *n = cnt + 1; 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 /* 7673 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7674 7675 ncollapsed - the number of block indices 7676 collapsed - the block indices (must be large enough to contain the indices) 7677 */ 7678 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7679 { 7680 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7681 7682 PetscFunctionBegin; 7683 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7684 for (i = start + 1; i < start + bs; i++) { 7685 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7686 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7687 cprevtmp = cprev; 7688 cprev = merged; 7689 merged = cprevtmp; 7690 } 7691 *ncollapsed = nprev; 7692 if (collapsed) *collapsed = cprev; 7693 PetscFunctionReturn(PETSC_SUCCESS); 7694 } 7695 7696 /* 7697 This will eventually be folded into MatCreateGraph_AIJ() for optimal performance 7698 */ 7699 static PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) 7700 { 7701 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7702 Mat tGmat; 7703 MPI_Comm comm; 7704 const PetscScalar *vals; 7705 const PetscInt *idx; 7706 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7707 MatScalar *AA; // this is checked in graph 7708 PetscBool isseqaij; 7709 Mat a, b, c; 7710 MatType jtype; 7711 7712 PetscFunctionBegin; 7713 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7714 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7715 PetscCall(MatGetType(Gmat, &jtype)); 7716 PetscCall(MatCreate(comm, &tGmat)); 7717 PetscCall(MatSetType(tGmat, jtype)); 7718 7719 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7720 Also, if the matrix is symmetric, can we skip this 7721 operation? It can be very expensive on large matrices. */ 7722 7723 // global sizes 7724 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7725 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7726 nloc = Iend - Istart; 7727 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7728 if (isseqaij) { 7729 a = Gmat; 7730 b = NULL; 7731 } else { 7732 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7733 a = d->A; 7734 b = d->B; 7735 garray = d->garray; 7736 } 7737 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7738 for (PetscInt row = 0; row < nloc; row++) { 7739 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7740 d_nnz[row] = ncols; 7741 if (ncols > maxcols) maxcols = ncols; 7742 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7743 } 7744 if (b) { 7745 for (PetscInt row = 0; row < nloc; row++) { 7746 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7747 o_nnz[row] = ncols; 7748 if (ncols > maxcols) maxcols = ncols; 7749 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7750 } 7751 } 7752 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7753 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7754 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7755 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7756 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7757 PetscCall(PetscFree2(d_nnz, o_nnz)); 7758 // 7759 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7760 nnz0 = nnz1 = 0; 7761 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7762 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7763 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7764 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7765 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7766 if (PetscRealPart(sv) > vfilter) { 7767 nnz1++; 7768 PetscInt cid = idx[jj] + Istart; //diag 7769 if (c != a) cid = garray[idx[jj]]; 7770 AA[ncol_row] = vals[jj]; 7771 AJ[ncol_row] = cid; 7772 ncol_row++; 7773 } 7774 } 7775 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7776 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7777 } 7778 } 7779 PetscCall(PetscFree2(AA, AJ)); 7780 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7781 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7782 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7783 7784 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7785 7786 *filteredG = tGmat; 7787 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7788 PetscFunctionReturn(PETSC_SUCCESS); 7789 } 7790 7791 /* 7792 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7793 7794 Input Parameter: 7795 . Amat - matrix 7796 - symmetrize - make the result symmetric 7797 + scale - scale with diagonal 7798 7799 Output Parameter: 7800 . a_Gmat - output scalar graph >= 0 7801 7802 */ 7803 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, Mat *a_Gmat) 7804 { 7805 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7806 MPI_Comm comm; 7807 Mat Gmat; 7808 PetscBool ismpiaij, isseqaij; 7809 Mat a, b, c; 7810 MatType jtype; 7811 7812 PetscFunctionBegin; 7813 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7814 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7815 PetscCall(MatGetSize(Amat, &MM, &NN)); 7816 PetscCall(MatGetBlockSize(Amat, &bs)); 7817 nloc = (Iend - Istart) / bs; 7818 7819 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7820 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7821 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7822 7823 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7824 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7825 implementation */ 7826 if (bs > 1) { 7827 PetscCall(MatGetType(Amat, &jtype)); 7828 PetscCall(MatCreate(comm, &Gmat)); 7829 PetscCall(MatSetType(Gmat, jtype)); 7830 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7831 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7832 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7833 PetscInt *d_nnz, *o_nnz; 7834 MatScalar *aa, val, *AA; 7835 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7836 if (isseqaij) { 7837 a = Amat; 7838 b = NULL; 7839 } else { 7840 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7841 a = d->A; 7842 b = d->B; 7843 } 7844 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7845 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7846 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7847 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7848 const PetscInt *cols; 7849 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7850 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7851 nnz[brow / bs] = jj / bs; 7852 if (jj % bs) ok = 0; 7853 if (cols) j0 = cols[0]; 7854 else j0 = -1; 7855 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7856 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7857 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7858 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7859 if (jj % bs) ok = 0; 7860 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7861 if (nnz[brow / bs] != jj / bs) ok = 0; 7862 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7863 } 7864 if (!ok) { 7865 PetscCall(PetscFree2(d_nnz, o_nnz)); 7866 goto old_bs; 7867 } 7868 } 7869 } 7870 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7871 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7872 PetscCall(PetscFree2(d_nnz, o_nnz)); 7873 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7874 // diag 7875 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7876 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7877 ai = aseq->i; 7878 n = ai[brow + 1] - ai[brow]; 7879 aj = aseq->j + ai[brow]; 7880 for (int k = 0; k < n; k += bs) { // block columns 7881 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7882 val = 0; 7883 for (int ii = 0; ii < bs; ii++) { // rows in block 7884 aa = aseq->a + ai[brow + ii] + k; 7885 for (int jj = 0; jj < bs; jj++) { // columns in block 7886 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7887 } 7888 } 7889 AA[k / bs] = val; 7890 } 7891 grow = Istart / bs + brow / bs; 7892 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7893 } 7894 // off-diag 7895 if (ismpiaij) { 7896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7897 const PetscScalar *vals; 7898 const PetscInt *cols, *garray = aij->garray; 7899 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7900 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7901 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7902 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7903 AA[k / bs] = 0; 7904 AJ[cidx] = garray[cols[k]] / bs; 7905 } 7906 nc = ncols / bs; 7907 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7908 for (int ii = 0; ii < bs; ii++) { // rows in block 7909 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7910 for (int k = 0; k < ncols; k += bs) { 7911 for (int jj = 0; jj < bs; jj++) { // cols in block 7912 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7913 } 7914 } 7915 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7916 } 7917 grow = Istart / bs + brow / bs; 7918 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7919 } 7920 } 7921 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7922 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7923 PetscCall(PetscFree2(AA, AJ)); 7924 } else { 7925 const PetscScalar *vals; 7926 const PetscInt *idx; 7927 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7928 old_bs: 7929 /* 7930 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7931 */ 7932 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7933 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7934 if (isseqaij) { 7935 PetscInt max_d_nnz; 7936 /* 7937 Determine exact preallocation count for (sequential) scalar matrix 7938 */ 7939 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7940 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7941 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7942 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7943 PetscCall(PetscFree3(w0, w1, w2)); 7944 } else if (ismpiaij) { 7945 Mat Daij, Oaij; 7946 const PetscInt *garray; 7947 PetscInt max_d_nnz; 7948 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7949 /* 7950 Determine exact preallocation count for diagonal block portion of scalar matrix 7951 */ 7952 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7953 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7954 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7955 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7956 PetscCall(PetscFree3(w0, w1, w2)); 7957 /* 7958 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7959 */ 7960 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7961 o_nnz[jj] = 0; 7962 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7963 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7964 o_nnz[jj] += ncols; 7965 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7966 } 7967 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7968 } 7969 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7970 /* get scalar copy (norms) of matrix */ 7971 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7972 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7973 PetscCall(PetscFree2(d_nnz, o_nnz)); 7974 for (Ii = Istart; Ii < Iend; Ii++) { 7975 PetscInt dest_row = Ii / bs; 7976 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7977 for (jj = 0; jj < ncols; jj++) { 7978 PetscInt dest_col = idx[jj] / bs; 7979 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7980 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7981 } 7982 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7983 } 7984 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7985 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7986 } 7987 } else { 7988 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7989 else { 7990 Gmat = Amat; 7991 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7992 } 7993 if (isseqaij) { 7994 a = Gmat; 7995 b = NULL; 7996 } else { 7997 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7998 a = d->A; 7999 b = d->B; 8000 } 8001 if (filter >= 0 || scale) { 8002 /* take absolute value of each entry */ 8003 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8004 MatInfo info; 8005 PetscScalar *avals; 8006 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8007 PetscCall(MatSeqAIJGetArray(c, &avals)); 8008 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8009 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8010 } 8011 } 8012 } 8013 if (symmetrize) { 8014 PetscBool isset, issym; 8015 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8016 if (!isset || !issym) { 8017 Mat matTrans; 8018 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8019 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8020 PetscCall(MatDestroy(&matTrans)); 8021 } 8022 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8023 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8024 if (scale) { 8025 /* scale c for all diagonal values = 1 or -1 */ 8026 Vec diag; 8027 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8028 PetscCall(MatGetDiagonal(Gmat, diag)); 8029 PetscCall(VecReciprocal(diag)); 8030 PetscCall(VecSqrtAbs(diag)); 8031 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8032 PetscCall(VecDestroy(&diag)); 8033 } 8034 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8035 8036 if (filter >= 0) { 8037 Mat Fmat = NULL; /* some silly compiler needs this */ 8038 8039 PetscCall(MatFilter_AIJ(Gmat, filter, &Fmat)); 8040 PetscCall(MatDestroy(&Gmat)); 8041 Gmat = Fmat; 8042 } 8043 *a_Gmat = Gmat; 8044 PetscFunctionReturn(PETSC_SUCCESS); 8045 } 8046 8047 /* 8048 Special version for direct calls from Fortran 8049 */ 8050 #include <petsc/private/fortranimpl.h> 8051 8052 /* Change these macros so can be used in void function */ 8053 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8054 #undef PetscCall 8055 #define PetscCall(...) \ 8056 do { \ 8057 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8058 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8059 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8060 return; \ 8061 } \ 8062 } while (0) 8063 8064 #undef SETERRQ 8065 #define SETERRQ(comm, ierr, ...) \ 8066 do { \ 8067 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8068 return; \ 8069 } while (0) 8070 8071 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8072 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8073 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8074 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8075 #else 8076 #endif 8077 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8078 { 8079 Mat mat = *mmat; 8080 PetscInt m = *mm, n = *mn; 8081 InsertMode addv = *maddv; 8082 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8083 PetscScalar value; 8084 8085 MatCheckPreallocated(mat, 1); 8086 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8087 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8088 { 8089 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8090 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8091 PetscBool roworiented = aij->roworiented; 8092 8093 /* Some Variables required in the macro */ 8094 Mat A = aij->A; 8095 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8096 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8097 MatScalar *aa; 8098 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8099 Mat B = aij->B; 8100 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8101 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8102 MatScalar *ba; 8103 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8104 * cannot use "#if defined" inside a macro. */ 8105 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8106 8107 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8108 PetscInt nonew = a->nonew; 8109 MatScalar *ap1, *ap2; 8110 8111 PetscFunctionBegin; 8112 PetscCall(MatSeqAIJGetArray(A, &aa)); 8113 PetscCall(MatSeqAIJGetArray(B, &ba)); 8114 for (i = 0; i < m; i++) { 8115 if (im[i] < 0) continue; 8116 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8117 if (im[i] >= rstart && im[i] < rend) { 8118 row = im[i] - rstart; 8119 lastcol1 = -1; 8120 rp1 = aj + ai[row]; 8121 ap1 = aa + ai[row]; 8122 rmax1 = aimax[row]; 8123 nrow1 = ailen[row]; 8124 low1 = 0; 8125 high1 = nrow1; 8126 lastcol2 = -1; 8127 rp2 = bj + bi[row]; 8128 ap2 = ba + bi[row]; 8129 rmax2 = bimax[row]; 8130 nrow2 = bilen[row]; 8131 low2 = 0; 8132 high2 = nrow2; 8133 8134 for (j = 0; j < n; j++) { 8135 if (roworiented) value = v[i * n + j]; 8136 else value = v[i + j * m]; 8137 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8138 if (in[j] >= cstart && in[j] < cend) { 8139 col = in[j] - cstart; 8140 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8141 } else if (in[j] < 0) continue; 8142 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8143 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8144 } else { 8145 if (mat->was_assembled) { 8146 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8147 #if defined(PETSC_USE_CTABLE) 8148 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8149 col--; 8150 #else 8151 col = aij->colmap[in[j]] - 1; 8152 #endif 8153 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 8154 PetscCall(MatDisAssemble_MPIAIJ(mat)); 8155 col = in[j]; 8156 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8157 B = aij->B; 8158 b = (Mat_SeqAIJ *)B->data; 8159 bimax = b->imax; 8160 bi = b->i; 8161 bilen = b->ilen; 8162 bj = b->j; 8163 rp2 = bj + bi[row]; 8164 ap2 = ba + bi[row]; 8165 rmax2 = bimax[row]; 8166 nrow2 = bilen[row]; 8167 low2 = 0; 8168 high2 = nrow2; 8169 bm = aij->B->rmap->n; 8170 ba = b->a; 8171 inserted = PETSC_FALSE; 8172 } 8173 } else col = in[j]; 8174 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8175 } 8176 } 8177 } else if (!aij->donotstash) { 8178 if (roworiented) { 8179 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8180 } else { 8181 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8182 } 8183 } 8184 } 8185 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8186 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8187 } 8188 PetscFunctionReturnVoid(); 8189 } 8190 8191 /* Undefining these here since they were redefined from their original definition above! No 8192 * other PETSc functions should be defined past this point, as it is impossible to recover the 8193 * original definitions */ 8194 #undef PetscCall 8195 #undef SETERRQ 8196