1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 MatGetRowMaxAbs_MPIAIJ, 2793 /*69*/ MatGetRowMinAbs_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFDColoringApply_AIJ, 2797 MatSetFromOptions_MPIAIJ, 2798 MatFindZeroDiagonals_MPIAIJ, 2799 /*75*/ NULL, 2800 NULL, 2801 NULL, 2802 MatLoad_MPIAIJ, 2803 NULL, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 /*89*/ MatBindToCPU_MPIAIJ, 2814 MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 /*94*/ NULL, 2819 MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 /*99*/ NULL, 2824 NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 /*104*/ MatMissingDiagonal_MPIAIJ, 2829 MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 /*109*/ NULL, 2834 MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*114*/ MatGetMultiProcBlock_MPIAIJ, 2839 MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 /*119*/ MatCreateSubMatricesMPI_MPIAIJ, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*124*/ NULL, 2849 NULL, 2850 NULL, 2851 MatSetBlockSizes_MPIAIJ, 2852 NULL, 2853 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 NULL, 2857 NULL, 2858 /*134*/ NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 MatEliminateZeros_MPIAIJ, 2862 MatGetRowSumAbs_MPIAIJ, 2863 /*139*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCopyHashToXAIJ_MPI_Hash}; 2867 2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2869 { 2870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2871 2872 PetscFunctionBegin; 2873 PetscCall(MatStoreValues(aij->A)); 2874 PetscCall(MatStoreValues(aij->B)); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatRetrieveValues(aij->A)); 2884 PetscCall(MatRetrieveValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2889 { 2890 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2891 PetscMPIInt size; 2892 2893 PetscFunctionBegin; 2894 if (B->hash_active) { 2895 B->ops[0] = b->cops; 2896 B->hash_active = PETSC_FALSE; 2897 } 2898 PetscCall(PetscLayoutSetUp(B->rmap)); 2899 PetscCall(PetscLayoutSetUp(B->cmap)); 2900 2901 #if defined(PETSC_USE_CTABLE) 2902 PetscCall(PetscHMapIDestroy(&b->colmap)); 2903 #else 2904 PetscCall(PetscFree(b->colmap)); 2905 #endif 2906 PetscCall(PetscFree(b->garray)); 2907 PetscCall(VecDestroy(&b->lvec)); 2908 PetscCall(VecScatterDestroy(&b->Mvctx)); 2909 2910 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2911 2912 MatSeqXAIJGetOptions_Private(b->B); 2913 PetscCall(MatDestroy(&b->B)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2915 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2916 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2917 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2918 MatSeqXAIJRestoreOptions_Private(b->B); 2919 2920 MatSeqXAIJGetOptions_Private(b->A); 2921 PetscCall(MatDestroy(&b->A)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 MatSeqXAIJRestoreOptions_Private(b->A); 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(PETSC_SUCCESS); 2934 } 2935 2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2939 PetscBool ondiagreset, offdiagreset, memoryreset; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2943 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2944 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2945 2946 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2947 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2948 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2949 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2950 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2951 2952 PetscCall(PetscLayoutSetUp(B->rmap)); 2953 PetscCall(PetscLayoutSetUp(B->cmap)); 2954 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2955 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2956 PetscCall(VecScatterDestroy(&b->Mvctx)); 2957 2958 B->preallocated = PETSC_TRUE; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2962 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2967 { 2968 Mat mat; 2969 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2970 2971 PetscFunctionBegin; 2972 *newmat = NULL; 2973 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2974 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2975 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2976 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2977 a = (Mat_MPIAIJ *)mat->data; 2978 2979 mat->factortype = matin->factortype; 2980 mat->assembled = matin->assembled; 2981 mat->insertmode = NOT_SET_VALUES; 2982 2983 a->size = oldmat->size; 2984 a->rank = oldmat->rank; 2985 a->donotstash = oldmat->donotstash; 2986 a->roworiented = oldmat->roworiented; 2987 a->rowindices = NULL; 2988 a->rowvalues = NULL; 2989 a->getrowactive = PETSC_FALSE; 2990 2991 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2992 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2993 if (matin->hash_active) { 2994 PetscCall(MatSetUp(mat)); 2995 } else { 2996 mat->preallocated = matin->preallocated; 2997 if (oldmat->colmap) { 2998 #if defined(PETSC_USE_CTABLE) 2999 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3000 #else 3001 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3002 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3003 #endif 3004 } else a->colmap = NULL; 3005 if (oldmat->garray) { 3006 PetscInt len; 3007 len = oldmat->B->cmap->n; 3008 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3009 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3010 } else a->garray = NULL; 3011 3012 /* It may happen MatDuplicate is called with a non-assembled matrix 3013 In fact, MatDuplicate only requires the matrix to be preallocated 3014 This may happen inside a DMCreateMatrix_Shell */ 3015 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3016 if (oldmat->Mvctx) { 3017 a->Mvctx = oldmat->Mvctx; 3018 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3019 } 3020 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3021 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3022 } 3023 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3024 *newmat = mat; 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3029 { 3030 PetscBool isbinary, ishdf5; 3031 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3034 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3035 /* force binary viewer to load .info file if it has not yet done so */ 3036 PetscCall(PetscViewerSetUp(viewer)); 3037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3039 if (isbinary) { 3040 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3041 } else if (ishdf5) { 3042 #if defined(PETSC_HAVE_HDF5) 3043 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3044 #else 3045 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3046 #endif 3047 } else { 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3049 } 3050 PetscFunctionReturn(PETSC_SUCCESS); 3051 } 3052 3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3054 { 3055 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3056 PetscInt *rowidxs, *colidxs; 3057 PetscScalar *matvals; 3058 3059 PetscFunctionBegin; 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 3062 /* read in matrix header */ 3063 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3064 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3065 M = header[1]; 3066 N = header[2]; 3067 nz = header[3]; 3068 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3069 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3070 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3071 3072 /* set block sizes from the viewer's .info file */ 3073 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3074 /* set global sizes if not set already */ 3075 if (mat->rmap->N < 0) mat->rmap->N = M; 3076 if (mat->cmap->N < 0) mat->cmap->N = N; 3077 PetscCall(PetscLayoutSetUp(mat->rmap)); 3078 PetscCall(PetscLayoutSetUp(mat->cmap)); 3079 3080 /* check if the matrix sizes are correct */ 3081 PetscCall(MatGetSize(mat, &rows, &cols)); 3082 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3083 3084 /* read in row lengths and build row indices */ 3085 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3086 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3087 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3088 rowidxs[0] = 0; 3089 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3090 if (nz != PETSC_INT_MAX) { 3091 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 } 3094 3095 /* read in column indices and matrix values */ 3096 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3099 /* store matrix indices and values */ 3100 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3101 PetscCall(PetscFree(rowidxs)); 3102 PetscCall(PetscFree2(colidxs, matvals)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /* Not scalable because of ISAllGather() unless getting all columns. */ 3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3108 { 3109 IS iscol_local; 3110 PetscBool isstride; 3111 PetscMPIInt gisstride = 0; 3112 3113 PetscFunctionBegin; 3114 /* check if we are grabbing all columns*/ 3115 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3116 3117 if (isstride) { 3118 PetscInt start, len, mstart, mlen; 3119 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3120 PetscCall(ISGetLocalSize(iscol, &len)); 3121 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3122 if (mstart == start && mlen - mstart == len) gisstride = 1; 3123 } 3124 3125 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3126 if (gisstride) { 3127 PetscInt N; 3128 PetscCall(MatGetSize(mat, NULL, &N)); 3129 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3130 PetscCall(ISSetIdentity(iscol_local)); 3131 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3132 } else { 3133 PetscInt cbs; 3134 PetscCall(ISGetBlockSize(iscol, &cbs)); 3135 PetscCall(ISAllGather(iscol, &iscol_local)); 3136 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3137 } 3138 3139 *isseq = iscol_local; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 3143 /* 3144 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3145 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3146 3147 Input Parameters: 3148 + mat - matrix 3149 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->rstart <= isrow[i] < mat->rend 3151 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->cstart <= iscol[i] < mat->cend 3153 3154 Output Parameters: 3155 + isrow_d - sequential row index set for retrieving mat->A 3156 . iscol_d - sequential column index set for retrieving mat->A 3157 . iscol_o - sequential column index set for retrieving mat->B 3158 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3159 */ 3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n, count, M_size, N_size; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 PetscInt *garray, *garray_compact; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 // Compact garray so its not of size Bn 3301 PetscCall(ISGetSize(iscol_o, &count)); 3302 PetscCall(PetscMalloc1(count, &garray_compact)); 3303 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3304 3305 /* Create submatrix M */ 3306 PetscCall(ISGetSize(isrow, &M_size)); 3307 PetscCall(ISGetSize(iscol, &N_size)); 3308 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3309 3310 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3311 asub = (Mat_MPIAIJ *)M->data; 3312 3313 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3314 n = asub->B->cmap->N; 3315 if (BsubN > n) { 3316 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3317 const PetscInt *idx; 3318 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3319 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3320 3321 PetscCall(PetscMalloc1(n, &idx_new)); 3322 j = 0; 3323 PetscCall(ISGetIndices(iscol_o, &idx)); 3324 for (i = 0; i < n; i++) { 3325 if (j >= BsubN) break; 3326 while (subgarray[i] > garray[j]) j++; 3327 3328 if (subgarray[i] == garray[j]) { 3329 idx_new[i] = idx[j++]; 3330 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3331 } 3332 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3333 3334 PetscCall(ISDestroy(&iscol_o)); 3335 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3336 3337 } else if (BsubN < n) { 3338 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3339 } 3340 3341 PetscCall(PetscFree(garray)); 3342 *submat = M; 3343 3344 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3345 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3346 PetscCall(ISDestroy(&isrow_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3349 PetscCall(ISDestroy(&iscol_d)); 3350 3351 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3352 PetscCall(ISDestroy(&iscol_o)); 3353 } 3354 PetscFunctionReturn(PETSC_SUCCESS); 3355 } 3356 3357 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3358 { 3359 IS iscol_local = NULL, isrow_d; 3360 PetscInt csize; 3361 PetscInt n, i, j, start, end; 3362 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3363 MPI_Comm comm; 3364 3365 PetscFunctionBegin; 3366 /* If isrow has same processor distribution as mat, 3367 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3368 if (call == MAT_REUSE_MATRIX) { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3370 if (isrow_d) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3373 } else { 3374 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3375 if (iscol_local) { 3376 sameRowDist = PETSC_TRUE; 3377 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3378 } 3379 } 3380 } else { 3381 /* Check if isrow has same processor distribution as mat */ 3382 sameDist[0] = PETSC_FALSE; 3383 PetscCall(ISGetLocalSize(isrow, &n)); 3384 if (!n) { 3385 sameDist[0] = PETSC_TRUE; 3386 } else { 3387 PetscCall(ISGetMinMax(isrow, &i, &j)); 3388 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3389 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3390 } 3391 3392 /* Check if iscol has same processor distribution as mat */ 3393 sameDist[1] = PETSC_FALSE; 3394 PetscCall(ISGetLocalSize(iscol, &n)); 3395 if (!n) { 3396 sameDist[1] = PETSC_TRUE; 3397 } else { 3398 PetscCall(ISGetMinMax(iscol, &i, &j)); 3399 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3400 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3401 } 3402 3403 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3404 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3405 sameRowDist = tsameDist[0]; 3406 } 3407 3408 if (sameRowDist) { 3409 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3410 /* isrow and iscol have same processor distribution as mat */ 3411 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } else { /* sameRowDist */ 3414 /* isrow has same processor distribution as mat */ 3415 if (call == MAT_INITIAL_MATRIX) { 3416 PetscBool sorted; 3417 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3418 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3419 PetscCall(ISGetSize(iscol, &i)); 3420 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3421 3422 PetscCall(ISSorted(iscol_local, &sorted)); 3423 if (sorted) { 3424 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } else { /* call == MAT_REUSE_MATRIX */ 3429 IS iscol_sub; 3430 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3431 if (iscol_sub) { 3432 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 } 3436 } 3437 } 3438 3439 /* General case: iscol -> iscol_local which has global size of iscol */ 3440 if (call == MAT_REUSE_MATRIX) { 3441 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3442 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3443 } else { 3444 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3445 } 3446 3447 PetscCall(ISGetLocalSize(iscol, &csize)); 3448 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3449 3450 if (call == MAT_INITIAL_MATRIX) { 3451 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3452 PetscCall(ISDestroy(&iscol_local)); 3453 } 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 3457 /*@C 3458 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3459 and "off-diagonal" part of the matrix in CSR format. 3460 3461 Collective 3462 3463 Input Parameters: 3464 + comm - MPI communicator 3465 . M - the global row size 3466 . N - the global column size 3467 . A - "diagonal" portion of matrix 3468 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3469 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3470 3471 Output Parameter: 3472 . mat - the matrix, with input `A` as its local diagonal matrix 3473 3474 Level: advanced 3475 3476 Notes: 3477 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3478 3479 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3480 3481 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3482 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3483 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3484 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3485 3486 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3487 @*/ 3488 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3489 { 3490 PetscInt m, n; 3491 MatType mpi_mat_type; 3492 Mat_MPIAIJ *mpiaij; 3493 Mat C; 3494 3495 PetscFunctionBegin; 3496 PetscCall(MatCreate(comm, &C)); 3497 PetscCall(MatGetSize(A, &m, &n)); 3498 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3499 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3500 3501 PetscCall(MatSetSizes(C, m, n, M, N)); 3502 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3503 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3504 PetscCall(MatSetType(C, mpi_mat_type)); 3505 if (!garray) { 3506 const PetscScalar *ba; 3507 3508 B->nonzerostate++; 3509 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3510 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3511 } 3512 3513 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3514 PetscCall(PetscLayoutSetUp(C->rmap)); 3515 PetscCall(PetscLayoutSetUp(C->cmap)); 3516 3517 mpiaij = (Mat_MPIAIJ *)C->data; 3518 mpiaij->A = A; 3519 mpiaij->B = B; 3520 mpiaij->garray = garray; 3521 C->preallocated = PETSC_TRUE; 3522 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3523 3524 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3525 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3526 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3527 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3528 */ 3529 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3530 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3531 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3532 *mat = C; 3533 PetscFunctionReturn(PETSC_SUCCESS); 3534 } 3535 3536 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3537 3538 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3539 { 3540 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3541 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3542 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3543 Mat M, Msub, B = a->B; 3544 MatScalar *aa; 3545 Mat_SeqAIJ *aij; 3546 PetscInt *garray = a->garray, *colsub, Ncols; 3547 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3548 IS iscol_sub, iscmap; 3549 const PetscInt *is_idx, *cmap; 3550 PetscBool allcolumns = PETSC_FALSE; 3551 MPI_Comm comm; 3552 3553 PetscFunctionBegin; 3554 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3555 if (call == MAT_REUSE_MATRIX) { 3556 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3557 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3558 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3559 3560 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3561 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3562 3563 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3564 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3565 3566 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3567 3568 } else { /* call == MAT_INITIAL_MATRIX) */ 3569 PetscBool flg; 3570 3571 PetscCall(ISGetLocalSize(iscol, &n)); 3572 PetscCall(ISGetSize(iscol, &Ncols)); 3573 3574 /* (1) iscol -> nonscalable iscol_local */ 3575 /* Check for special case: each processor gets entire matrix columns */ 3576 PetscCall(ISIdentity(iscol_local, &flg)); 3577 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3578 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3579 if (allcolumns) { 3580 iscol_sub = iscol_local; 3581 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3582 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3583 3584 } else { 3585 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3586 PetscInt *idx, *cmap1, k; 3587 PetscCall(PetscMalloc1(Ncols, &idx)); 3588 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3589 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3590 count = 0; 3591 k = 0; 3592 for (i = 0; i < Ncols; i++) { 3593 j = is_idx[i]; 3594 if (j >= cstart && j < cend) { 3595 /* diagonal part of mat */ 3596 idx[count] = j; 3597 cmap1[count++] = i; /* column index in submat */ 3598 } else if (Bn) { 3599 /* off-diagonal part of mat */ 3600 if (j == garray[k]) { 3601 idx[count] = j; 3602 cmap1[count++] = i; /* column index in submat */ 3603 } else if (j > garray[k]) { 3604 while (j > garray[k] && k < Bn - 1) k++; 3605 if (j == garray[k]) { 3606 idx[count] = j; 3607 cmap1[count++] = i; /* column index in submat */ 3608 } 3609 } 3610 } 3611 } 3612 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3613 3614 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3615 PetscCall(ISGetBlockSize(iscol, &cbs)); 3616 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3617 3618 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3619 } 3620 3621 /* (3) Create sequential Msub */ 3622 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3623 } 3624 3625 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3626 aij = (Mat_SeqAIJ *)Msub->data; 3627 ii = aij->i; 3628 PetscCall(ISGetIndices(iscmap, &cmap)); 3629 3630 /* 3631 m - number of local rows 3632 Ncols - number of columns (same on all processors) 3633 rstart - first row in new global matrix generated 3634 */ 3635 PetscCall(MatGetSize(Msub, &m, NULL)); 3636 3637 if (call == MAT_INITIAL_MATRIX) { 3638 /* (4) Create parallel newmat */ 3639 PetscMPIInt rank, size; 3640 PetscInt csize; 3641 3642 PetscCallMPI(MPI_Comm_size(comm, &size)); 3643 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3644 3645 /* 3646 Determine the number of non-zeros in the diagonal and off-diagonal 3647 portions of the matrix in order to do correct preallocation 3648 */ 3649 3650 /* first get start and end of "diagonal" columns */ 3651 PetscCall(ISGetLocalSize(iscol, &csize)); 3652 if (csize == PETSC_DECIDE) { 3653 PetscCall(ISGetSize(isrow, &mglobal)); 3654 if (mglobal == Ncols) { /* square matrix */ 3655 nlocal = m; 3656 } else { 3657 nlocal = Ncols / size + ((Ncols % size) > rank); 3658 } 3659 } else { 3660 nlocal = csize; 3661 } 3662 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3663 rstart = rend - nlocal; 3664 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3665 3666 /* next, compute all the lengths */ 3667 jj = aij->j; 3668 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3669 olens = dlens + m; 3670 for (i = 0; i < m; i++) { 3671 jend = ii[i + 1] - ii[i]; 3672 olen = 0; 3673 dlen = 0; 3674 for (j = 0; j < jend; j++) { 3675 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3676 else dlen++; 3677 jj++; 3678 } 3679 olens[i] = olen; 3680 dlens[i] = dlen; 3681 } 3682 3683 PetscCall(ISGetBlockSize(isrow, &bs)); 3684 PetscCall(ISGetBlockSize(iscol, &cbs)); 3685 3686 PetscCall(MatCreate(comm, &M)); 3687 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3688 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3689 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3690 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3691 PetscCall(PetscFree(dlens)); 3692 3693 } else { /* call == MAT_REUSE_MATRIX */ 3694 M = *newmat; 3695 PetscCall(MatGetLocalSize(M, &i, NULL)); 3696 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3697 PetscCall(MatZeroEntries(M)); 3698 /* 3699 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3700 rather than the slower MatSetValues(). 3701 */ 3702 M->was_assembled = PETSC_TRUE; 3703 M->assembled = PETSC_FALSE; 3704 } 3705 3706 /* (5) Set values of Msub to *newmat */ 3707 PetscCall(PetscMalloc1(count, &colsub)); 3708 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3709 3710 jj = aij->j; 3711 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3712 for (i = 0; i < m; i++) { 3713 row = rstart + i; 3714 nz = ii[i + 1] - ii[i]; 3715 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3716 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3717 jj += nz; 3718 aa += nz; 3719 } 3720 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3721 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3722 3723 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3724 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3725 3726 PetscCall(PetscFree(colsub)); 3727 3728 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3729 if (call == MAT_INITIAL_MATRIX) { 3730 *newmat = M; 3731 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3732 PetscCall(MatDestroy(&Msub)); 3733 3734 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3735 PetscCall(ISDestroy(&iscol_sub)); 3736 3737 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3738 PetscCall(ISDestroy(&iscmap)); 3739 3740 if (iscol_local) { 3741 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3742 PetscCall(ISDestroy(&iscol_local)); 3743 } 3744 } 3745 PetscFunctionReturn(PETSC_SUCCESS); 3746 } 3747 3748 /* 3749 Not great since it makes two copies of the submatrix, first an SeqAIJ 3750 in local and then by concatenating the local matrices the end result. 3751 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3752 3753 This requires a sequential iscol with all indices. 3754 */ 3755 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3756 { 3757 PetscMPIInt rank, size; 3758 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3759 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3760 Mat M, Mreuse; 3761 MatScalar *aa, *vwork; 3762 MPI_Comm comm; 3763 Mat_SeqAIJ *aij; 3764 PetscBool colflag, allcolumns = PETSC_FALSE; 3765 3766 PetscFunctionBegin; 3767 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3768 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3769 PetscCallMPI(MPI_Comm_size(comm, &size)); 3770 3771 /* Check for special case: each processor gets entire matrix columns */ 3772 PetscCall(ISIdentity(iscol, &colflag)); 3773 PetscCall(ISGetLocalSize(iscol, &n)); 3774 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3775 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3776 3777 if (call == MAT_REUSE_MATRIX) { 3778 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3779 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3780 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3781 } else { 3782 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3783 } 3784 3785 /* 3786 m - number of local rows 3787 n - number of columns (same on all processors) 3788 rstart - first row in new global matrix generated 3789 */ 3790 PetscCall(MatGetSize(Mreuse, &m, &n)); 3791 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3792 if (call == MAT_INITIAL_MATRIX) { 3793 aij = (Mat_SeqAIJ *)Mreuse->data; 3794 ii = aij->i; 3795 jj = aij->j; 3796 3797 /* 3798 Determine the number of non-zeros in the diagonal and off-diagonal 3799 portions of the matrix in order to do correct preallocation 3800 */ 3801 3802 /* first get start and end of "diagonal" columns */ 3803 if (csize == PETSC_DECIDE) { 3804 PetscCall(ISGetSize(isrow, &mglobal)); 3805 if (mglobal == n) { /* square matrix */ 3806 nlocal = m; 3807 } else { 3808 nlocal = n / size + ((n % size) > rank); 3809 } 3810 } else { 3811 nlocal = csize; 3812 } 3813 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3814 rstart = rend - nlocal; 3815 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3816 3817 /* next, compute all the lengths */ 3818 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3819 olens = dlens + m; 3820 for (i = 0; i < m; i++) { 3821 jend = ii[i + 1] - ii[i]; 3822 olen = 0; 3823 dlen = 0; 3824 for (j = 0; j < jend; j++) { 3825 if (*jj < rstart || *jj >= rend) olen++; 3826 else dlen++; 3827 jj++; 3828 } 3829 olens[i] = olen; 3830 dlens[i] = dlen; 3831 } 3832 PetscCall(MatCreate(comm, &M)); 3833 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3834 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3835 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3836 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3837 PetscCall(PetscFree(dlens)); 3838 } else { 3839 PetscInt ml, nl; 3840 3841 M = *newmat; 3842 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3843 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3844 PetscCall(MatZeroEntries(M)); 3845 /* 3846 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3847 rather than the slower MatSetValues(). 3848 */ 3849 M->was_assembled = PETSC_TRUE; 3850 M->assembled = PETSC_FALSE; 3851 } 3852 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3853 aij = (Mat_SeqAIJ *)Mreuse->data; 3854 ii = aij->i; 3855 jj = aij->j; 3856 3857 /* trigger copy to CPU if needed */ 3858 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3859 for (i = 0; i < m; i++) { 3860 row = rstart + i; 3861 nz = ii[i + 1] - ii[i]; 3862 cwork = jj; 3863 jj = PetscSafePointerPlusOffset(jj, nz); 3864 vwork = aa; 3865 aa = PetscSafePointerPlusOffset(aa, nz); 3866 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3867 } 3868 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3869 3870 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3871 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3872 *newmat = M; 3873 3874 /* save submatrix used in processor for next request */ 3875 if (call == MAT_INITIAL_MATRIX) { 3876 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3877 PetscCall(MatDestroy(&Mreuse)); 3878 } 3879 PetscFunctionReturn(PETSC_SUCCESS); 3880 } 3881 3882 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3883 { 3884 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3885 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3886 const PetscInt *JJ; 3887 PetscBool nooffprocentries; 3888 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3889 3890 PetscFunctionBegin; 3891 PetscCall(PetscLayoutSetUp(B->rmap)); 3892 PetscCall(PetscLayoutSetUp(B->cmap)); 3893 m = B->rmap->n; 3894 cstart = B->cmap->rstart; 3895 cend = B->cmap->rend; 3896 rstart = B->rmap->rstart; 3897 irstart = Ii[0]; 3898 3899 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3900 3901 if (PetscDefined(USE_DEBUG)) { 3902 for (i = 0; i < m; i++) { 3903 nnz = Ii[i + 1] - Ii[i]; 3904 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3905 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3906 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3907 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3908 } 3909 } 3910 3911 for (i = 0; i < m; i++) { 3912 nnz = Ii[i + 1] - Ii[i]; 3913 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3914 nnz_max = PetscMax(nnz_max, nnz); 3915 d = 0; 3916 for (j = 0; j < nnz; j++) { 3917 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3918 } 3919 d_nnz[i] = d; 3920 o_nnz[i] = nnz - d; 3921 } 3922 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3923 PetscCall(PetscFree2(d_nnz, o_nnz)); 3924 3925 for (i = 0; i < m; i++) { 3926 ii = i + rstart; 3927 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3928 } 3929 nooffprocentries = B->nooffprocentries; 3930 B->nooffprocentries = PETSC_TRUE; 3931 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3932 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3933 B->nooffprocentries = nooffprocentries; 3934 3935 /* count number of entries below block diagonal */ 3936 PetscCall(PetscFree(Aij->ld)); 3937 PetscCall(PetscCalloc1(m, &ld)); 3938 Aij->ld = ld; 3939 for (i = 0; i < m; i++) { 3940 nnz = Ii[i + 1] - Ii[i]; 3941 j = 0; 3942 while (j < nnz && J[j] < cstart) j++; 3943 ld[i] = j; 3944 if (J) J += nnz; 3945 } 3946 3947 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3948 PetscFunctionReturn(PETSC_SUCCESS); 3949 } 3950 3951 /*@ 3952 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3953 (the default parallel PETSc format). 3954 3955 Collective 3956 3957 Input Parameters: 3958 + B - the matrix 3959 . i - the indices into `j` for the start of each local row (indices start with zero) 3960 . j - the column indices for each local row (indices start with zero) 3961 - v - optional values in the matrix 3962 3963 Level: developer 3964 3965 Notes: 3966 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3967 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3968 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3969 3970 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3971 3972 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3973 3974 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3975 3976 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3977 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3978 3979 The format which is used for the sparse matrix input, is equivalent to a 3980 row-major ordering.. i.e for the following matrix, the input data expected is 3981 as shown 3982 .vb 3983 1 0 0 3984 2 0 3 P0 3985 ------- 3986 4 5 6 P1 3987 3988 Process0 [P0] rows_owned=[0,1] 3989 i = {0,1,3} [size = nrow+1 = 2+1] 3990 j = {0,0,2} [size = 3] 3991 v = {1,2,3} [size = 3] 3992 3993 Process1 [P1] rows_owned=[2] 3994 i = {0,3} [size = nrow+1 = 1+1] 3995 j = {0,1,2} [size = 3] 3996 v = {4,5,6} [size = 3] 3997 .ve 3998 3999 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4000 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4001 @*/ 4002 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4003 { 4004 PetscFunctionBegin; 4005 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4006 PetscFunctionReturn(PETSC_SUCCESS); 4007 } 4008 4009 /*@ 4010 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4011 (the default parallel PETSc format). For good matrix assembly performance 4012 the user should preallocate the matrix storage by setting the parameters 4013 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4014 4015 Collective 4016 4017 Input Parameters: 4018 + B - the matrix 4019 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4020 (same value is used for all local rows) 4021 . d_nnz - array containing the number of nonzeros in the various rows of the 4022 DIAGONAL portion of the local submatrix (possibly different for each row) 4023 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4024 The size of this array is equal to the number of local rows, i.e 'm'. 4025 For matrices that will be factored, you must leave room for (and set) 4026 the diagonal entry even if it is zero. 4027 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4028 submatrix (same value is used for all local rows). 4029 - o_nnz - array containing the number of nonzeros in the various rows of the 4030 OFF-DIAGONAL portion of the local submatrix (possibly different for 4031 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4032 structure. The size of this array is equal to the number 4033 of local rows, i.e 'm'. 4034 4035 Example Usage: 4036 Consider the following 8x8 matrix with 34 non-zero values, that is 4037 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4038 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4039 as follows 4040 4041 .vb 4042 1 2 0 | 0 3 0 | 0 4 4043 Proc0 0 5 6 | 7 0 0 | 8 0 4044 9 0 10 | 11 0 0 | 12 0 4045 ------------------------------------- 4046 13 0 14 | 15 16 17 | 0 0 4047 Proc1 0 18 0 | 19 20 21 | 0 0 4048 0 0 0 | 22 23 0 | 24 0 4049 ------------------------------------- 4050 Proc2 25 26 27 | 0 0 28 | 29 0 4051 30 0 0 | 31 32 33 | 0 34 4052 .ve 4053 4054 This can be represented as a collection of submatrices as 4055 .vb 4056 A B C 4057 D E F 4058 G H I 4059 .ve 4060 4061 Where the submatrices A,B,C are owned by proc0, D,E,F are 4062 owned by proc1, G,H,I are owned by proc2. 4063 4064 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4065 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4066 The 'M','N' parameters are 8,8, and have the same values on all procs. 4067 4068 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4069 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4070 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4071 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4072 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4073 matrix, and [DF] as another `MATSEQAIJ` matrix. 4074 4075 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4076 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4077 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4078 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4079 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4080 In this case, the values of `d_nz`, `o_nz` are 4081 .vb 4082 proc0 dnz = 2, o_nz = 2 4083 proc1 dnz = 3, o_nz = 2 4084 proc2 dnz = 1, o_nz = 4 4085 .ve 4086 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4087 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4088 for proc3. i.e we are using 12+15+10=37 storage locations to store 4089 34 values. 4090 4091 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4092 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4093 In the above case the values for `d_nnz`, `o_nnz` are 4094 .vb 4095 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4096 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4097 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4098 .ve 4099 Here the space allocated is sum of all the above values i.e 34, and 4100 hence pre-allocation is perfect. 4101 4102 Level: intermediate 4103 4104 Notes: 4105 If the *_nnz parameter is given then the *_nz parameter is ignored 4106 4107 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4108 storage. The stored row and column indices begin with zero. 4109 See [Sparse Matrices](sec_matsparse) for details. 4110 4111 The parallel matrix is partitioned such that the first m0 rows belong to 4112 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4113 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4114 4115 The DIAGONAL portion of the local submatrix of a processor can be defined 4116 as the submatrix which is obtained by extraction the part corresponding to 4117 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4118 first row that belongs to the processor, r2 is the last row belonging to 4119 the this processor, and c1-c2 is range of indices of the local part of a 4120 vector suitable for applying the matrix to. This is an mxn matrix. In the 4121 common case of a square matrix, the row and column ranges are the same and 4122 the DIAGONAL part is also square. The remaining portion of the local 4123 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4124 4125 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4126 4127 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4128 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4129 You can also run with the option `-info` and look for messages with the string 4130 malloc in them to see if additional memory allocation was needed. 4131 4132 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4133 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4134 @*/ 4135 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4136 { 4137 PetscFunctionBegin; 4138 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4139 PetscValidType(B, 1); 4140 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4141 PetscFunctionReturn(PETSC_SUCCESS); 4142 } 4143 4144 /*@ 4145 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4146 CSR format for the local rows. 4147 4148 Collective 4149 4150 Input Parameters: 4151 + comm - MPI communicator 4152 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4153 . n - This value should be the same as the local size used in creating the 4154 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4155 calculated if `N` is given) For square matrices n is almost always `m`. 4156 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4157 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4158 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4159 . j - global column indices 4160 - a - optional matrix values 4161 4162 Output Parameter: 4163 . mat - the matrix 4164 4165 Level: intermediate 4166 4167 Notes: 4168 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4169 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4170 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4171 4172 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4173 4174 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4175 4176 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4177 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4178 4179 The format which is used for the sparse matrix input, is equivalent to a 4180 row-major ordering, i.e., for the following matrix, the input data expected is 4181 as shown 4182 .vb 4183 1 0 0 4184 2 0 3 P0 4185 ------- 4186 4 5 6 P1 4187 4188 Process0 [P0] rows_owned=[0,1] 4189 i = {0,1,3} [size = nrow+1 = 2+1] 4190 j = {0,0,2} [size = 3] 4191 v = {1,2,3} [size = 3] 4192 4193 Process1 [P1] rows_owned=[2] 4194 i = {0,3} [size = nrow+1 = 1+1] 4195 j = {0,1,2} [size = 3] 4196 v = {4,5,6} [size = 3] 4197 .ve 4198 4199 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4200 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4201 @*/ 4202 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4203 { 4204 PetscFunctionBegin; 4205 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4206 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4207 PetscCall(MatCreate(comm, mat)); 4208 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4209 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4210 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4211 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4212 PetscFunctionReturn(PETSC_SUCCESS); 4213 } 4214 4215 /*@ 4216 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4217 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4218 from `MatCreateMPIAIJWithArrays()` 4219 4220 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4221 4222 Collective 4223 4224 Input Parameters: 4225 + mat - the matrix 4226 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4227 . n - This value should be the same as the local size used in creating the 4228 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4229 calculated if N is given) For square matrices n is almost always m. 4230 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4231 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4232 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4233 . J - column indices 4234 - v - matrix values 4235 4236 Level: deprecated 4237 4238 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4239 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4240 @*/ 4241 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4242 { 4243 PetscInt nnz, i; 4244 PetscBool nooffprocentries; 4245 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4246 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4247 PetscScalar *ad, *ao; 4248 PetscInt ldi, Iii, md; 4249 const PetscInt *Adi = Ad->i; 4250 PetscInt *ld = Aij->ld; 4251 4252 PetscFunctionBegin; 4253 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4254 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4255 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4256 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4257 4258 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4259 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4260 4261 for (i = 0; i < m; i++) { 4262 if (PetscDefined(USE_DEBUG)) { 4263 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4264 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4265 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4266 } 4267 } 4268 nnz = Ii[i + 1] - Ii[i]; 4269 Iii = Ii[i]; 4270 ldi = ld[i]; 4271 md = Adi[i + 1] - Adi[i]; 4272 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4273 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4274 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4275 ad += md; 4276 ao += nnz - md; 4277 } 4278 nooffprocentries = mat->nooffprocentries; 4279 mat->nooffprocentries = PETSC_TRUE; 4280 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4281 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4282 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4283 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4284 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4285 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4286 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4287 mat->nooffprocentries = nooffprocentries; 4288 PetscFunctionReturn(PETSC_SUCCESS); 4289 } 4290 4291 /*@ 4292 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4293 4294 Collective 4295 4296 Input Parameters: 4297 + mat - the matrix 4298 - v - matrix values, stored by row 4299 4300 Level: intermediate 4301 4302 Notes: 4303 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4304 4305 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4306 4307 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4308 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4309 @*/ 4310 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4311 { 4312 PetscInt nnz, i, m; 4313 PetscBool nooffprocentries; 4314 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4315 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4316 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4317 PetscScalar *ad, *ao; 4318 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4319 PetscInt ldi, Iii, md; 4320 PetscInt *ld = Aij->ld; 4321 4322 PetscFunctionBegin; 4323 m = mat->rmap->n; 4324 4325 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4326 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4327 Iii = 0; 4328 for (i = 0; i < m; i++) { 4329 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4330 ldi = ld[i]; 4331 md = Adi[i + 1] - Adi[i]; 4332 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4333 ad += md; 4334 if (ao) { 4335 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4336 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4337 ao += nnz - md; 4338 } 4339 Iii += nnz; 4340 } 4341 nooffprocentries = mat->nooffprocentries; 4342 mat->nooffprocentries = PETSC_TRUE; 4343 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4344 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4345 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4346 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4347 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4348 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4349 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4350 mat->nooffprocentries = nooffprocentries; 4351 PetscFunctionReturn(PETSC_SUCCESS); 4352 } 4353 4354 /*@ 4355 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4356 (the default parallel PETSc format). For good matrix assembly performance 4357 the user should preallocate the matrix storage by setting the parameters 4358 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4359 4360 Collective 4361 4362 Input Parameters: 4363 + comm - MPI communicator 4364 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4365 This value should be the same as the local size used in creating the 4366 y vector for the matrix-vector product y = Ax. 4367 . n - This value should be the same as the local size used in creating the 4368 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4369 calculated if N is given) For square matrices n is almost always m. 4370 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4371 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4372 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4373 (same value is used for all local rows) 4374 . d_nnz - array containing the number of nonzeros in the various rows of the 4375 DIAGONAL portion of the local submatrix (possibly different for each row) 4376 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4377 The size of this array is equal to the number of local rows, i.e 'm'. 4378 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4379 submatrix (same value is used for all local rows). 4380 - o_nnz - array containing the number of nonzeros in the various rows of the 4381 OFF-DIAGONAL portion of the local submatrix (possibly different for 4382 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4383 structure. The size of this array is equal to the number 4384 of local rows, i.e 'm'. 4385 4386 Output Parameter: 4387 . A - the matrix 4388 4389 Options Database Keys: 4390 + -mat_no_inode - Do not use inodes 4391 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4392 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4393 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4394 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4395 4396 Level: intermediate 4397 4398 Notes: 4399 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4400 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4401 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4402 4403 If the *_nnz parameter is given then the *_nz parameter is ignored 4404 4405 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4406 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4407 storage requirements for this matrix. 4408 4409 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4410 processor than it must be used on all processors that share the object for 4411 that argument. 4412 4413 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4414 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4415 4416 The user MUST specify either the local or global matrix dimensions 4417 (possibly both). 4418 4419 The parallel matrix is partitioned across processors such that the 4420 first `m0` rows belong to process 0, the next `m1` rows belong to 4421 process 1, the next `m2` rows belong to process 2, etc., where 4422 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4423 values corresponding to [m x N] submatrix. 4424 4425 The columns are logically partitioned with the n0 columns belonging 4426 to 0th partition, the next n1 columns belonging to the next 4427 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4428 4429 The DIAGONAL portion of the local submatrix on any given processor 4430 is the submatrix corresponding to the rows and columns m,n 4431 corresponding to the given processor. i.e diagonal matrix on 4432 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4433 etc. The remaining portion of the local submatrix [m x (N-n)] 4434 constitute the OFF-DIAGONAL portion. The example below better 4435 illustrates this concept. The two matrices, the DIAGONAL portion and 4436 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4437 4438 For a square global matrix we define each processor's diagonal portion 4439 to be its local rows and the corresponding columns (a square submatrix); 4440 each processor's off-diagonal portion encompasses the remainder of the 4441 local matrix (a rectangular submatrix). 4442 4443 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4444 4445 When calling this routine with a single process communicator, a matrix of 4446 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4447 type of communicator, use the construction mechanism 4448 .vb 4449 MatCreate(..., &A); 4450 MatSetType(A, MATMPIAIJ); 4451 MatSetSizes(A, m, n, M, N); 4452 MatMPIAIJSetPreallocation(A, ...); 4453 .ve 4454 4455 By default, this format uses inodes (identical nodes) when possible. 4456 We search for consecutive rows with the same nonzero structure, thereby 4457 reusing matrix information to achieve increased efficiency. 4458 4459 Example Usage: 4460 Consider the following 8x8 matrix with 34 non-zero values, that is 4461 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4462 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4463 as follows 4464 4465 .vb 4466 1 2 0 | 0 3 0 | 0 4 4467 Proc0 0 5 6 | 7 0 0 | 8 0 4468 9 0 10 | 11 0 0 | 12 0 4469 ------------------------------------- 4470 13 0 14 | 15 16 17 | 0 0 4471 Proc1 0 18 0 | 19 20 21 | 0 0 4472 0 0 0 | 22 23 0 | 24 0 4473 ------------------------------------- 4474 Proc2 25 26 27 | 0 0 28 | 29 0 4475 30 0 0 | 31 32 33 | 0 34 4476 .ve 4477 4478 This can be represented as a collection of submatrices as 4479 4480 .vb 4481 A B C 4482 D E F 4483 G H I 4484 .ve 4485 4486 Where the submatrices A,B,C are owned by proc0, D,E,F are 4487 owned by proc1, G,H,I are owned by proc2. 4488 4489 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4490 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4491 The 'M','N' parameters are 8,8, and have the same values on all procs. 4492 4493 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4494 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4495 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4496 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4497 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4498 matrix, and [DF] as another SeqAIJ matrix. 4499 4500 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4501 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4502 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4503 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4504 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4505 In this case, the values of `d_nz`,`o_nz` are 4506 .vb 4507 proc0 dnz = 2, o_nz = 2 4508 proc1 dnz = 3, o_nz = 2 4509 proc2 dnz = 1, o_nz = 4 4510 .ve 4511 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4512 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4513 for proc3. i.e we are using 12+15+10=37 storage locations to store 4514 34 values. 4515 4516 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4517 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4518 In the above case the values for d_nnz,o_nnz are 4519 .vb 4520 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4521 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4522 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4523 .ve 4524 Here the space allocated is sum of all the above values i.e 34, and 4525 hence pre-allocation is perfect. 4526 4527 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4528 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4529 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4530 @*/ 4531 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4532 { 4533 PetscMPIInt size; 4534 4535 PetscFunctionBegin; 4536 PetscCall(MatCreate(comm, A)); 4537 PetscCall(MatSetSizes(*A, m, n, M, N)); 4538 PetscCallMPI(MPI_Comm_size(comm, &size)); 4539 if (size > 1) { 4540 PetscCall(MatSetType(*A, MATMPIAIJ)); 4541 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4542 } else { 4543 PetscCall(MatSetType(*A, MATSEQAIJ)); 4544 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4545 } 4546 PetscFunctionReturn(PETSC_SUCCESS); 4547 } 4548 4549 /*@C 4550 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4551 4552 Not Collective 4553 4554 Input Parameter: 4555 . A - The `MATMPIAIJ` matrix 4556 4557 Output Parameters: 4558 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4559 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4560 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4561 4562 Level: intermediate 4563 4564 Note: 4565 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4566 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4567 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4568 local column numbers to global column numbers in the original matrix. 4569 4570 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4571 @*/ 4572 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4573 { 4574 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4575 PetscBool flg; 4576 4577 PetscFunctionBegin; 4578 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4579 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4580 if (Ad) *Ad = a->A; 4581 if (Ao) *Ao = a->B; 4582 if (colmap) *colmap = a->garray; 4583 PetscFunctionReturn(PETSC_SUCCESS); 4584 } 4585 4586 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4587 { 4588 PetscInt m, N, i, rstart, nnz, Ii; 4589 PetscInt *indx; 4590 PetscScalar *values; 4591 MatType rootType; 4592 4593 PetscFunctionBegin; 4594 PetscCall(MatGetSize(inmat, &m, &N)); 4595 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4596 PetscInt *dnz, *onz, sum, bs, cbs; 4597 4598 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4599 /* Check sum(n) = N */ 4600 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4601 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4602 4603 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4604 rstart -= m; 4605 4606 MatPreallocateBegin(comm, m, n, dnz, onz); 4607 for (i = 0; i < m; i++) { 4608 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4609 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4610 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4611 } 4612 4613 PetscCall(MatCreate(comm, outmat)); 4614 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4615 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4616 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4617 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4618 PetscCall(MatSetType(*outmat, rootType)); 4619 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4620 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4621 MatPreallocateEnd(dnz, onz); 4622 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4623 } 4624 4625 /* numeric phase */ 4626 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4627 for (i = 0; i < m; i++) { 4628 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4629 Ii = i + rstart; 4630 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4631 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4632 } 4633 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4634 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4635 PetscFunctionReturn(PETSC_SUCCESS); 4636 } 4637 4638 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4639 { 4640 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4641 4642 PetscFunctionBegin; 4643 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4644 PetscCall(PetscFree(merge->id_r)); 4645 PetscCall(PetscFree(merge->len_s)); 4646 PetscCall(PetscFree(merge->len_r)); 4647 PetscCall(PetscFree(merge->bi)); 4648 PetscCall(PetscFree(merge->bj)); 4649 PetscCall(PetscFree(merge->buf_ri[0])); 4650 PetscCall(PetscFree(merge->buf_ri)); 4651 PetscCall(PetscFree(merge->buf_rj[0])); 4652 PetscCall(PetscFree(merge->buf_rj)); 4653 PetscCall(PetscFree(merge->coi)); 4654 PetscCall(PetscFree(merge->coj)); 4655 PetscCall(PetscFree(merge->owners_co)); 4656 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4657 PetscCall(PetscFree(merge)); 4658 PetscFunctionReturn(PETSC_SUCCESS); 4659 } 4660 4661 #include <../src/mat/utils/freespace.h> 4662 #include <petscbt.h> 4663 4664 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4665 { 4666 MPI_Comm comm; 4667 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4668 PetscMPIInt size, rank, taga, *len_s; 4669 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4670 PetscMPIInt proc, k; 4671 PetscInt **buf_ri, **buf_rj; 4672 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4673 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4674 MPI_Request *s_waits, *r_waits; 4675 MPI_Status *status; 4676 const MatScalar *aa, *a_a; 4677 MatScalar **abuf_r, *ba_i; 4678 Mat_Merge_SeqsToMPI *merge; 4679 PetscContainer container; 4680 4681 PetscFunctionBegin; 4682 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4683 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4684 4685 PetscCallMPI(MPI_Comm_size(comm, &size)); 4686 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4687 4688 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4689 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4690 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4691 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4692 aa = a_a; 4693 4694 bi = merge->bi; 4695 bj = merge->bj; 4696 buf_ri = merge->buf_ri; 4697 buf_rj = merge->buf_rj; 4698 4699 PetscCall(PetscMalloc1(size, &status)); 4700 owners = merge->rowmap->range; 4701 len_s = merge->len_s; 4702 4703 /* send and recv matrix values */ 4704 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4705 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4706 4707 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4708 for (proc = 0, k = 0; proc < size; proc++) { 4709 if (!len_s[proc]) continue; 4710 i = owners[proc]; 4711 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4712 k++; 4713 } 4714 4715 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4716 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4717 PetscCall(PetscFree(status)); 4718 4719 PetscCall(PetscFree(s_waits)); 4720 PetscCall(PetscFree(r_waits)); 4721 4722 /* insert mat values of mpimat */ 4723 PetscCall(PetscMalloc1(N, &ba_i)); 4724 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4725 4726 for (k = 0; k < merge->nrecv; k++) { 4727 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4728 nrows = *buf_ri_k[k]; 4729 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4730 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4731 } 4732 4733 /* set values of ba */ 4734 m = merge->rowmap->n; 4735 for (i = 0; i < m; i++) { 4736 arow = owners[rank] + i; 4737 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4738 bnzi = bi[i + 1] - bi[i]; 4739 PetscCall(PetscArrayzero(ba_i, bnzi)); 4740 4741 /* add local non-zero vals of this proc's seqmat into ba */ 4742 anzi = ai[arow + 1] - ai[arow]; 4743 aj = a->j + ai[arow]; 4744 aa = a_a + ai[arow]; 4745 nextaj = 0; 4746 for (j = 0; nextaj < anzi; j++) { 4747 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4748 ba_i[j] += aa[nextaj++]; 4749 } 4750 } 4751 4752 /* add received vals into ba */ 4753 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4754 /* i-th row */ 4755 if (i == *nextrow[k]) { 4756 anzi = *(nextai[k] + 1) - *nextai[k]; 4757 aj = buf_rj[k] + *nextai[k]; 4758 aa = abuf_r[k] + *nextai[k]; 4759 nextaj = 0; 4760 for (j = 0; nextaj < anzi; j++) { 4761 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4762 ba_i[j] += aa[nextaj++]; 4763 } 4764 } 4765 nextrow[k]++; 4766 nextai[k]++; 4767 } 4768 } 4769 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4770 } 4771 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4772 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4773 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4774 4775 PetscCall(PetscFree(abuf_r[0])); 4776 PetscCall(PetscFree(abuf_r)); 4777 PetscCall(PetscFree(ba_i)); 4778 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4779 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4780 PetscFunctionReturn(PETSC_SUCCESS); 4781 } 4782 4783 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4784 { 4785 Mat B_mpi; 4786 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4787 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4788 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4789 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4790 PetscInt len, *dnz, *onz, bs, cbs; 4791 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4792 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4793 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4794 MPI_Status *status; 4795 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4796 PetscBT lnkbt; 4797 Mat_Merge_SeqsToMPI *merge; 4798 PetscContainer container; 4799 4800 PetscFunctionBegin; 4801 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4802 4803 /* make sure it is a PETSc comm */ 4804 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4805 PetscCallMPI(MPI_Comm_size(comm, &size)); 4806 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4807 4808 PetscCall(PetscNew(&merge)); 4809 PetscCall(PetscMalloc1(size, &status)); 4810 4811 /* determine row ownership */ 4812 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4813 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4814 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4815 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4816 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4817 PetscCall(PetscMalloc1(size, &len_si)); 4818 PetscCall(PetscMalloc1(size, &merge->len_s)); 4819 4820 m = merge->rowmap->n; 4821 owners = merge->rowmap->range; 4822 4823 /* determine the number of messages to send, their lengths */ 4824 len_s = merge->len_s; 4825 4826 len = 0; /* length of buf_si[] */ 4827 merge->nsend = 0; 4828 for (PetscMPIInt proc = 0; proc < size; proc++) { 4829 len_si[proc] = 0; 4830 if (proc == rank) { 4831 len_s[proc] = 0; 4832 } else { 4833 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4834 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4835 } 4836 if (len_s[proc]) { 4837 merge->nsend++; 4838 nrows = 0; 4839 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4840 if (ai[i + 1] > ai[i]) nrows++; 4841 } 4842 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4843 len += len_si[proc]; 4844 } 4845 } 4846 4847 /* determine the number and length of messages to receive for ij-structure */ 4848 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4849 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4850 4851 /* post the Irecv of j-structure */ 4852 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4853 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4854 4855 /* post the Isend of j-structure */ 4856 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4857 4858 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4859 if (!len_s[proc]) continue; 4860 i = owners[proc]; 4861 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4862 k++; 4863 } 4864 4865 /* receives and sends of j-structure are complete */ 4866 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4867 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4868 4869 /* send and recv i-structure */ 4870 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4871 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4872 4873 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4874 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4875 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4876 if (!len_s[proc]) continue; 4877 /* form outgoing message for i-structure: 4878 buf_si[0]: nrows to be sent 4879 [1:nrows]: row index (global) 4880 [nrows+1:2*nrows+1]: i-structure index 4881 */ 4882 nrows = len_si[proc] / 2 - 1; 4883 buf_si_i = buf_si + nrows + 1; 4884 buf_si[0] = nrows; 4885 buf_si_i[0] = 0; 4886 nrows = 0; 4887 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4888 anzi = ai[i + 1] - ai[i]; 4889 if (anzi) { 4890 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4891 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4892 nrows++; 4893 } 4894 } 4895 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4896 k++; 4897 buf_si += len_si[proc]; 4898 } 4899 4900 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4901 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4902 4903 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4904 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4905 4906 PetscCall(PetscFree(len_si)); 4907 PetscCall(PetscFree(len_ri)); 4908 PetscCall(PetscFree(rj_waits)); 4909 PetscCall(PetscFree2(si_waits, sj_waits)); 4910 PetscCall(PetscFree(ri_waits)); 4911 PetscCall(PetscFree(buf_s)); 4912 PetscCall(PetscFree(status)); 4913 4914 /* compute a local seq matrix in each processor */ 4915 /* allocate bi array and free space for accumulating nonzero column info */ 4916 PetscCall(PetscMalloc1(m + 1, &bi)); 4917 bi[0] = 0; 4918 4919 /* create and initialize a linked list */ 4920 nlnk = N + 1; 4921 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4922 4923 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4924 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4925 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4926 4927 current_space = free_space; 4928 4929 /* determine symbolic info for each local row */ 4930 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4931 4932 for (k = 0; k < merge->nrecv; k++) { 4933 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4934 nrows = *buf_ri_k[k]; 4935 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4936 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4937 } 4938 4939 MatPreallocateBegin(comm, m, n, dnz, onz); 4940 len = 0; 4941 for (i = 0; i < m; i++) { 4942 bnzi = 0; 4943 /* add local non-zero cols of this proc's seqmat into lnk */ 4944 arow = owners[rank] + i; 4945 anzi = ai[arow + 1] - ai[arow]; 4946 aj = a->j + ai[arow]; 4947 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4948 bnzi += nlnk; 4949 /* add received col data into lnk */ 4950 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4951 if (i == *nextrow[k]) { /* i-th row */ 4952 anzi = *(nextai[k] + 1) - *nextai[k]; 4953 aj = buf_rj[k] + *nextai[k]; 4954 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4955 bnzi += nlnk; 4956 nextrow[k]++; 4957 nextai[k]++; 4958 } 4959 } 4960 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4961 4962 /* if free space is not available, make more free space */ 4963 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4964 /* copy data into free space, then initialize lnk */ 4965 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4966 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4967 4968 current_space->array += bnzi; 4969 current_space->local_used += bnzi; 4970 current_space->local_remaining -= bnzi; 4971 4972 bi[i + 1] = bi[i] + bnzi; 4973 } 4974 4975 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4976 4977 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4978 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4979 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4980 4981 /* create symbolic parallel matrix B_mpi */ 4982 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4983 PetscCall(MatCreate(comm, &B_mpi)); 4984 if (n == PETSC_DECIDE) { 4985 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4986 } else { 4987 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4988 } 4989 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4990 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4991 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4992 MatPreallocateEnd(dnz, onz); 4993 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4994 4995 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4996 B_mpi->assembled = PETSC_FALSE; 4997 merge->bi = bi; 4998 merge->bj = bj; 4999 merge->buf_ri = buf_ri; 5000 merge->buf_rj = buf_rj; 5001 merge->coi = NULL; 5002 merge->coj = NULL; 5003 merge->owners_co = NULL; 5004 5005 PetscCall(PetscCommDestroy(&comm)); 5006 5007 /* attach the supporting struct to B_mpi for reuse */ 5008 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5009 PetscCall(PetscContainerSetPointer(container, merge)); 5010 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5011 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5012 PetscCall(PetscContainerDestroy(&container)); 5013 *mpimat = B_mpi; 5014 5015 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5016 PetscFunctionReturn(PETSC_SUCCESS); 5017 } 5018 5019 /*@ 5020 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5021 matrices from each processor 5022 5023 Collective 5024 5025 Input Parameters: 5026 + comm - the communicators the parallel matrix will live on 5027 . seqmat - the input sequential matrices 5028 . m - number of local rows (or `PETSC_DECIDE`) 5029 . n - number of local columns (or `PETSC_DECIDE`) 5030 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5031 5032 Output Parameter: 5033 . mpimat - the parallel matrix generated 5034 5035 Level: advanced 5036 5037 Note: 5038 The dimensions of the sequential matrix in each processor MUST be the same. 5039 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5040 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5041 5042 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5043 @*/ 5044 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5045 { 5046 PetscMPIInt size; 5047 5048 PetscFunctionBegin; 5049 PetscCallMPI(MPI_Comm_size(comm, &size)); 5050 if (size == 1) { 5051 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5052 if (scall == MAT_INITIAL_MATRIX) { 5053 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5054 } else { 5055 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5056 } 5057 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5058 PetscFunctionReturn(PETSC_SUCCESS); 5059 } 5060 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5061 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5062 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5063 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5064 PetscFunctionReturn(PETSC_SUCCESS); 5065 } 5066 5067 /*@ 5068 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5069 5070 Not Collective 5071 5072 Input Parameter: 5073 . A - the matrix 5074 5075 Output Parameter: 5076 . A_loc - the local sequential matrix generated 5077 5078 Level: developer 5079 5080 Notes: 5081 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5082 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5083 `n` is the global column count obtained with `MatGetSize()` 5084 5085 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5086 5087 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5088 5089 Destroy the matrix with `MatDestroy()` 5090 5091 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5092 @*/ 5093 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5094 { 5095 PetscBool mpi; 5096 5097 PetscFunctionBegin; 5098 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5099 if (mpi) { 5100 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5101 } else { 5102 *A_loc = A; 5103 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5104 } 5105 PetscFunctionReturn(PETSC_SUCCESS); 5106 } 5107 5108 /*@ 5109 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5110 5111 Not Collective 5112 5113 Input Parameters: 5114 + A - the matrix 5115 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5116 5117 Output Parameter: 5118 . A_loc - the local sequential matrix generated 5119 5120 Level: developer 5121 5122 Notes: 5123 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5124 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5125 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5126 5127 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5128 5129 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5130 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5131 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5132 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5133 5134 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5135 @*/ 5136 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5137 { 5138 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5139 Mat_SeqAIJ *mat, *a, *b; 5140 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5141 const PetscScalar *aa, *ba, *aav, *bav; 5142 PetscScalar *ca, *cam; 5143 PetscMPIInt size; 5144 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5145 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5146 PetscBool match; 5147 5148 PetscFunctionBegin; 5149 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5150 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5151 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5152 if (size == 1) { 5153 if (scall == MAT_INITIAL_MATRIX) { 5154 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5155 *A_loc = mpimat->A; 5156 } else if (scall == MAT_REUSE_MATRIX) { 5157 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5158 } 5159 PetscFunctionReturn(PETSC_SUCCESS); 5160 } 5161 5162 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5163 a = (Mat_SeqAIJ *)mpimat->A->data; 5164 b = (Mat_SeqAIJ *)mpimat->B->data; 5165 ai = a->i; 5166 aj = a->j; 5167 bi = b->i; 5168 bj = b->j; 5169 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5170 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5171 aa = aav; 5172 ba = bav; 5173 if (scall == MAT_INITIAL_MATRIX) { 5174 PetscCall(PetscMalloc1(1 + am, &ci)); 5175 ci[0] = 0; 5176 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5177 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5178 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5179 k = 0; 5180 for (i = 0; i < am; i++) { 5181 ncols_o = bi[i + 1] - bi[i]; 5182 ncols_d = ai[i + 1] - ai[i]; 5183 /* off-diagonal portion of A */ 5184 for (jo = 0; jo < ncols_o; jo++) { 5185 col = cmap[*bj]; 5186 if (col >= cstart) break; 5187 cj[k] = col; 5188 bj++; 5189 ca[k++] = *ba++; 5190 } 5191 /* diagonal portion of A */ 5192 for (j = 0; j < ncols_d; j++) { 5193 cj[k] = cstart + *aj++; 5194 ca[k++] = *aa++; 5195 } 5196 /* off-diagonal portion of A */ 5197 for (j = jo; j < ncols_o; j++) { 5198 cj[k] = cmap[*bj++]; 5199 ca[k++] = *ba++; 5200 } 5201 } 5202 /* put together the new matrix */ 5203 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5204 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5205 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5206 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5207 mat->free_a = PETSC_TRUE; 5208 mat->free_ij = PETSC_TRUE; 5209 mat->nonew = 0; 5210 } else if (scall == MAT_REUSE_MATRIX) { 5211 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5212 ci = mat->i; 5213 cj = mat->j; 5214 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5215 for (i = 0; i < am; i++) { 5216 /* off-diagonal portion of A */ 5217 ncols_o = bi[i + 1] - bi[i]; 5218 for (jo = 0; jo < ncols_o; jo++) { 5219 col = cmap[*bj]; 5220 if (col >= cstart) break; 5221 *cam++ = *ba++; 5222 bj++; 5223 } 5224 /* diagonal portion of A */ 5225 ncols_d = ai[i + 1] - ai[i]; 5226 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5227 /* off-diagonal portion of A */ 5228 for (j = jo; j < ncols_o; j++) { 5229 *cam++ = *ba++; 5230 bj++; 5231 } 5232 } 5233 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5234 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5235 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5236 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5237 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5238 PetscFunctionReturn(PETSC_SUCCESS); 5239 } 5240 5241 /*@ 5242 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5243 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5244 5245 Not Collective 5246 5247 Input Parameters: 5248 + A - the matrix 5249 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5250 5251 Output Parameters: 5252 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5253 - A_loc - the local sequential matrix generated 5254 5255 Level: developer 5256 5257 Note: 5258 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5259 part, then those associated with the off-diagonal part (in its local ordering) 5260 5261 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5262 @*/ 5263 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5264 { 5265 Mat Ao, Ad; 5266 const PetscInt *cmap; 5267 PetscMPIInt size; 5268 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5269 5270 PetscFunctionBegin; 5271 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5272 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5273 if (size == 1) { 5274 if (scall == MAT_INITIAL_MATRIX) { 5275 PetscCall(PetscObjectReference((PetscObject)Ad)); 5276 *A_loc = Ad; 5277 } else if (scall == MAT_REUSE_MATRIX) { 5278 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5279 } 5280 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5281 PetscFunctionReturn(PETSC_SUCCESS); 5282 } 5283 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5284 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5285 if (f) { 5286 PetscCall((*f)(A, scall, glob, A_loc)); 5287 } else { 5288 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5289 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5290 Mat_SeqAIJ *c; 5291 PetscInt *ai = a->i, *aj = a->j; 5292 PetscInt *bi = b->i, *bj = b->j; 5293 PetscInt *ci, *cj; 5294 const PetscScalar *aa, *ba; 5295 PetscScalar *ca; 5296 PetscInt i, j, am, dn, on; 5297 5298 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5299 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5300 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5301 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5302 if (scall == MAT_INITIAL_MATRIX) { 5303 PetscInt k; 5304 PetscCall(PetscMalloc1(1 + am, &ci)); 5305 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5306 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5307 ci[0] = 0; 5308 for (i = 0, k = 0; i < am; i++) { 5309 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5310 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5311 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5312 /* diagonal portion of A */ 5313 for (j = 0; j < ncols_d; j++, k++) { 5314 cj[k] = *aj++; 5315 ca[k] = *aa++; 5316 } 5317 /* off-diagonal portion of A */ 5318 for (j = 0; j < ncols_o; j++, k++) { 5319 cj[k] = dn + *bj++; 5320 ca[k] = *ba++; 5321 } 5322 } 5323 /* put together the new matrix */ 5324 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5325 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5326 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5327 c = (Mat_SeqAIJ *)(*A_loc)->data; 5328 c->free_a = PETSC_TRUE; 5329 c->free_ij = PETSC_TRUE; 5330 c->nonew = 0; 5331 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5332 } else if (scall == MAT_REUSE_MATRIX) { 5333 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5334 for (i = 0; i < am; i++) { 5335 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5336 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5337 /* diagonal portion of A */ 5338 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5339 /* off-diagonal portion of A */ 5340 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5341 } 5342 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5343 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5344 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5345 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5346 if (glob) { 5347 PetscInt cst, *gidx; 5348 5349 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5350 PetscCall(PetscMalloc1(dn + on, &gidx)); 5351 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5352 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5353 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5354 } 5355 } 5356 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5357 PetscFunctionReturn(PETSC_SUCCESS); 5358 } 5359 5360 /*@C 5361 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5362 5363 Not Collective 5364 5365 Input Parameters: 5366 + A - the matrix 5367 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5368 . row - index set of rows to extract (or `NULL`) 5369 - col - index set of columns to extract (or `NULL`) 5370 5371 Output Parameter: 5372 . A_loc - the local sequential matrix generated 5373 5374 Level: developer 5375 5376 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5377 @*/ 5378 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5379 { 5380 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5381 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5382 IS isrowa, iscola; 5383 Mat *aloc; 5384 PetscBool match; 5385 5386 PetscFunctionBegin; 5387 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5388 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5389 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5390 if (!row) { 5391 start = A->rmap->rstart; 5392 end = A->rmap->rend; 5393 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5394 } else { 5395 isrowa = *row; 5396 } 5397 if (!col) { 5398 start = A->cmap->rstart; 5399 cmap = a->garray; 5400 nzA = a->A->cmap->n; 5401 nzB = a->B->cmap->n; 5402 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5403 ncols = 0; 5404 for (i = 0; i < nzB; i++) { 5405 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5406 else break; 5407 } 5408 imark = i; 5409 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5410 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5411 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5412 } else { 5413 iscola = *col; 5414 } 5415 if (scall != MAT_INITIAL_MATRIX) { 5416 PetscCall(PetscMalloc1(1, &aloc)); 5417 aloc[0] = *A_loc; 5418 } 5419 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5420 if (!col) { /* attach global id of condensed columns */ 5421 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5422 } 5423 *A_loc = aloc[0]; 5424 PetscCall(PetscFree(aloc)); 5425 if (!row) PetscCall(ISDestroy(&isrowa)); 5426 if (!col) PetscCall(ISDestroy(&iscola)); 5427 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5428 PetscFunctionReturn(PETSC_SUCCESS); 5429 } 5430 5431 /* 5432 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5433 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5434 * on a global size. 5435 * */ 5436 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5437 { 5438 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5439 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5440 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5441 PetscMPIInt owner; 5442 PetscSFNode *iremote, *oiremote; 5443 const PetscInt *lrowindices; 5444 PetscSF sf, osf; 5445 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5446 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5447 MPI_Comm comm; 5448 ISLocalToGlobalMapping mapping; 5449 const PetscScalar *pd_a, *po_a; 5450 5451 PetscFunctionBegin; 5452 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5453 /* plocalsize is the number of roots 5454 * nrows is the number of leaves 5455 * */ 5456 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5457 PetscCall(ISGetLocalSize(rows, &nrows)); 5458 PetscCall(PetscCalloc1(nrows, &iremote)); 5459 PetscCall(ISGetIndices(rows, &lrowindices)); 5460 for (i = 0; i < nrows; i++) { 5461 /* Find a remote index and an owner for a row 5462 * The row could be local or remote 5463 * */ 5464 owner = 0; 5465 lidx = 0; 5466 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5467 iremote[i].index = lidx; 5468 iremote[i].rank = owner; 5469 } 5470 /* Create SF to communicate how many nonzero columns for each row */ 5471 PetscCall(PetscSFCreate(comm, &sf)); 5472 /* SF will figure out the number of nonzero columns for each row, and their 5473 * offsets 5474 * */ 5475 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5476 PetscCall(PetscSFSetFromOptions(sf)); 5477 PetscCall(PetscSFSetUp(sf)); 5478 5479 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5480 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5481 PetscCall(PetscCalloc1(nrows, &pnnz)); 5482 roffsets[0] = 0; 5483 roffsets[1] = 0; 5484 for (i = 0; i < plocalsize; i++) { 5485 /* diagonal */ 5486 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5487 /* off-diagonal */ 5488 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5489 /* compute offsets so that we relative location for each row */ 5490 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5491 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5492 } 5493 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5494 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5495 /* 'r' means root, and 'l' means leaf */ 5496 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5497 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5498 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5499 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5500 PetscCall(PetscSFDestroy(&sf)); 5501 PetscCall(PetscFree(roffsets)); 5502 PetscCall(PetscFree(nrcols)); 5503 dntotalcols = 0; 5504 ontotalcols = 0; 5505 ncol = 0; 5506 for (i = 0; i < nrows; i++) { 5507 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5508 ncol = PetscMax(pnnz[i], ncol); 5509 /* diagonal */ 5510 dntotalcols += nlcols[i * 2 + 0]; 5511 /* off-diagonal */ 5512 ontotalcols += nlcols[i * 2 + 1]; 5513 } 5514 /* We do not need to figure the right number of columns 5515 * since all the calculations will be done by going through the raw data 5516 * */ 5517 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5518 PetscCall(MatSetUp(*P_oth)); 5519 PetscCall(PetscFree(pnnz)); 5520 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5521 /* diagonal */ 5522 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5523 /* off-diagonal */ 5524 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5525 /* diagonal */ 5526 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5527 /* off-diagonal */ 5528 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5529 dntotalcols = 0; 5530 ontotalcols = 0; 5531 ntotalcols = 0; 5532 for (i = 0; i < nrows; i++) { 5533 owner = 0; 5534 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5535 /* Set iremote for diag matrix */ 5536 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5537 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5538 iremote[dntotalcols].rank = owner; 5539 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5540 ilocal[dntotalcols++] = ntotalcols++; 5541 } 5542 /* off-diagonal */ 5543 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5544 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5545 oiremote[ontotalcols].rank = owner; 5546 oilocal[ontotalcols++] = ntotalcols++; 5547 } 5548 } 5549 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5550 PetscCall(PetscFree(loffsets)); 5551 PetscCall(PetscFree(nlcols)); 5552 PetscCall(PetscSFCreate(comm, &sf)); 5553 /* P serves as roots and P_oth is leaves 5554 * Diag matrix 5555 * */ 5556 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5557 PetscCall(PetscSFSetFromOptions(sf)); 5558 PetscCall(PetscSFSetUp(sf)); 5559 5560 PetscCall(PetscSFCreate(comm, &osf)); 5561 /* off-diagonal */ 5562 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5563 PetscCall(PetscSFSetFromOptions(osf)); 5564 PetscCall(PetscSFSetUp(osf)); 5565 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5566 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5567 /* operate on the matrix internal data to save memory */ 5568 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5569 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5570 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5571 /* Convert to global indices for diag matrix */ 5572 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5573 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5574 /* We want P_oth store global indices */ 5575 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5576 /* Use memory scalable approach */ 5577 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5578 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5579 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5580 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5581 /* Convert back to local indices */ 5582 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5583 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5584 nout = 0; 5585 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5586 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5587 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5588 /* Exchange values */ 5589 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5590 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5591 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5592 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5593 /* Stop PETSc from shrinking memory */ 5594 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5595 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5596 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5597 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5598 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5599 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5600 PetscCall(PetscSFDestroy(&sf)); 5601 PetscCall(PetscSFDestroy(&osf)); 5602 PetscFunctionReturn(PETSC_SUCCESS); 5603 } 5604 5605 /* 5606 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5607 * This supports MPIAIJ and MAIJ 5608 * */ 5609 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5610 { 5611 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5612 Mat_SeqAIJ *p_oth; 5613 IS rows, map; 5614 PetscHMapI hamp; 5615 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5616 MPI_Comm comm; 5617 PetscSF sf, osf; 5618 PetscBool has; 5619 5620 PetscFunctionBegin; 5621 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5622 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5623 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5624 * and then create a submatrix (that often is an overlapping matrix) 5625 * */ 5626 if (reuse == MAT_INITIAL_MATRIX) { 5627 /* Use a hash table to figure out unique keys */ 5628 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5629 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5630 count = 0; 5631 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5632 for (i = 0; i < a->B->cmap->n; i++) { 5633 key = a->garray[i] / dof; 5634 PetscCall(PetscHMapIHas(hamp, key, &has)); 5635 if (!has) { 5636 mapping[i] = count; 5637 PetscCall(PetscHMapISet(hamp, key, count++)); 5638 } else { 5639 /* Current 'i' has the same value the previous step */ 5640 mapping[i] = count - 1; 5641 } 5642 } 5643 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5644 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5645 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5646 PetscCall(PetscCalloc1(htsize, &rowindices)); 5647 off = 0; 5648 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5649 PetscCall(PetscHMapIDestroy(&hamp)); 5650 PetscCall(PetscSortInt(htsize, rowindices)); 5651 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5652 /* In case, the matrix was already created but users want to recreate the matrix */ 5653 PetscCall(MatDestroy(P_oth)); 5654 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5656 PetscCall(ISDestroy(&map)); 5657 PetscCall(ISDestroy(&rows)); 5658 } else if (reuse == MAT_REUSE_MATRIX) { 5659 /* If matrix was already created, we simply update values using SF objects 5660 * that as attached to the matrix earlier. 5661 */ 5662 const PetscScalar *pd_a, *po_a; 5663 5664 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5665 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5666 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5667 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5668 /* Update values in place */ 5669 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5670 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5671 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5672 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5673 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5674 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5675 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5676 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5677 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5678 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5679 PetscFunctionReturn(PETSC_SUCCESS); 5680 } 5681 5682 /*@C 5683 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5684 5685 Collective 5686 5687 Input Parameters: 5688 + A - the first matrix in `MATMPIAIJ` format 5689 . B - the second matrix in `MATMPIAIJ` format 5690 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5691 5692 Output Parameters: 5693 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5694 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5695 - B_seq - the sequential matrix generated 5696 5697 Level: developer 5698 5699 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5700 @*/ 5701 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5702 { 5703 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5704 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5705 IS isrowb, iscolb; 5706 Mat *bseq = NULL; 5707 5708 PetscFunctionBegin; 5709 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5710 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5711 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5712 5713 if (scall == MAT_INITIAL_MATRIX) { 5714 start = A->cmap->rstart; 5715 cmap = a->garray; 5716 nzA = a->A->cmap->n; 5717 nzB = a->B->cmap->n; 5718 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5719 ncols = 0; 5720 for (i = 0; i < nzB; i++) { /* row < local row index */ 5721 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5722 else break; 5723 } 5724 imark = i; 5725 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5726 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5727 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5728 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5729 } else { 5730 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5731 isrowb = *rowb; 5732 iscolb = *colb; 5733 PetscCall(PetscMalloc1(1, &bseq)); 5734 bseq[0] = *B_seq; 5735 } 5736 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5737 *B_seq = bseq[0]; 5738 PetscCall(PetscFree(bseq)); 5739 if (!rowb) { 5740 PetscCall(ISDestroy(&isrowb)); 5741 } else { 5742 *rowb = isrowb; 5743 } 5744 if (!colb) { 5745 PetscCall(ISDestroy(&iscolb)); 5746 } else { 5747 *colb = iscolb; 5748 } 5749 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5750 PetscFunctionReturn(PETSC_SUCCESS); 5751 } 5752 5753 /* 5754 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5755 of the OFF-DIAGONAL portion of local A 5756 5757 Collective 5758 5759 Input Parameters: 5760 + A,B - the matrices in `MATMPIAIJ` format 5761 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5762 5763 Output Parameter: 5764 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5765 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5766 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5767 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5768 5769 Developer Note: 5770 This directly accesses information inside the VecScatter associated with the matrix-vector product 5771 for this matrix. This is not desirable.. 5772 5773 Level: developer 5774 5775 */ 5776 5777 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5778 { 5779 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5780 VecScatter ctx; 5781 MPI_Comm comm; 5782 const PetscMPIInt *rprocs, *sprocs; 5783 PetscMPIInt nrecvs, nsends; 5784 const PetscInt *srow, *rstarts, *sstarts; 5785 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5786 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5787 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5788 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5789 PetscMPIInt size, tag, rank, nreqs; 5790 5791 PetscFunctionBegin; 5792 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5793 PetscCallMPI(MPI_Comm_size(comm, &size)); 5794 5795 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5796 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5797 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5798 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5799 5800 if (size == 1) { 5801 startsj_s = NULL; 5802 bufa_ptr = NULL; 5803 *B_oth = NULL; 5804 PetscFunctionReturn(PETSC_SUCCESS); 5805 } 5806 5807 ctx = a->Mvctx; 5808 tag = ((PetscObject)ctx)->tag; 5809 5810 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5811 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5812 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5813 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5814 PetscCall(PetscMalloc1(nreqs, &reqs)); 5815 rwaits = reqs; 5816 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5817 5818 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5819 if (scall == MAT_INITIAL_MATRIX) { 5820 /* i-array */ 5821 /* post receives */ 5822 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5823 for (i = 0; i < nrecvs; i++) { 5824 rowlen = rvalues + rstarts[i] * rbs; 5825 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5826 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5827 } 5828 5829 /* pack the outgoing message */ 5830 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5831 5832 sstartsj[0] = 0; 5833 rstartsj[0] = 0; 5834 len = 0; /* total length of j or a array to be sent */ 5835 if (nsends) { 5836 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5837 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5838 } 5839 for (i = 0; i < nsends; i++) { 5840 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5841 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5842 for (j = 0; j < nrows; j++) { 5843 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5844 for (l = 0; l < sbs; l++) { 5845 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5846 5847 rowlen[j * sbs + l] = ncols; 5848 5849 len += ncols; 5850 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5851 } 5852 k++; 5853 } 5854 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5855 5856 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5857 } 5858 /* recvs and sends of i-array are completed */ 5859 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5860 PetscCall(PetscFree(svalues)); 5861 5862 /* allocate buffers for sending j and a arrays */ 5863 PetscCall(PetscMalloc1(len + 1, &bufj)); 5864 PetscCall(PetscMalloc1(len + 1, &bufa)); 5865 5866 /* create i-array of B_oth */ 5867 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5868 5869 b_othi[0] = 0; 5870 len = 0; /* total length of j or a array to be received */ 5871 k = 0; 5872 for (i = 0; i < nrecvs; i++) { 5873 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5874 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5875 for (j = 0; j < nrows; j++) { 5876 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5877 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5878 k++; 5879 } 5880 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5881 } 5882 PetscCall(PetscFree(rvalues)); 5883 5884 /* allocate space for j and a arrays of B_oth */ 5885 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5886 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5887 5888 /* j-array */ 5889 /* post receives of j-array */ 5890 for (i = 0; i < nrecvs; i++) { 5891 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5892 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5893 } 5894 5895 /* pack the outgoing message j-array */ 5896 if (nsends) k = sstarts[0]; 5897 for (i = 0; i < nsends; i++) { 5898 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5899 bufJ = bufj + sstartsj[i]; 5900 for (j = 0; j < nrows; j++) { 5901 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5902 for (ll = 0; ll < sbs; ll++) { 5903 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5904 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5905 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5906 } 5907 } 5908 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5909 } 5910 5911 /* recvs and sends of j-array are completed */ 5912 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5913 } else if (scall == MAT_REUSE_MATRIX) { 5914 sstartsj = *startsj_s; 5915 rstartsj = *startsj_r; 5916 bufa = *bufa_ptr; 5917 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5918 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5919 5920 /* a-array */ 5921 /* post receives of a-array */ 5922 for (i = 0; i < nrecvs; i++) { 5923 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5924 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5925 } 5926 5927 /* pack the outgoing message a-array */ 5928 if (nsends) k = sstarts[0]; 5929 for (i = 0; i < nsends; i++) { 5930 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5931 bufA = bufa + sstartsj[i]; 5932 for (j = 0; j < nrows; j++) { 5933 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5934 for (ll = 0; ll < sbs; ll++) { 5935 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5936 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5937 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5938 } 5939 } 5940 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5941 } 5942 /* recvs and sends of a-array are completed */ 5943 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5944 PetscCall(PetscFree(reqs)); 5945 5946 if (scall == MAT_INITIAL_MATRIX) { 5947 Mat_SeqAIJ *b_oth; 5948 5949 /* put together the new matrix */ 5950 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5951 5952 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5953 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5954 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5955 b_oth->free_a = PETSC_TRUE; 5956 b_oth->free_ij = PETSC_TRUE; 5957 b_oth->nonew = 0; 5958 5959 PetscCall(PetscFree(bufj)); 5960 if (!startsj_s || !bufa_ptr) { 5961 PetscCall(PetscFree2(sstartsj, rstartsj)); 5962 PetscCall(PetscFree(bufa_ptr)); 5963 } else { 5964 *startsj_s = sstartsj; 5965 *startsj_r = rstartsj; 5966 *bufa_ptr = bufa; 5967 } 5968 } else if (scall == MAT_REUSE_MATRIX) { 5969 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5970 } 5971 5972 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5973 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5974 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5975 PetscFunctionReturn(PETSC_SUCCESS); 5976 } 5977 5978 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5979 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5981 #if defined(PETSC_HAVE_MKL_SPARSE) 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5983 #endif 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5986 #if defined(PETSC_HAVE_ELEMENTAL) 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5988 #endif 5989 #if defined(PETSC_HAVE_SCALAPACK) 5990 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5991 #endif 5992 #if defined(PETSC_HAVE_HYPRE) 5993 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5994 #endif 5995 #if defined(PETSC_HAVE_CUDA) 5996 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5997 #endif 5998 #if defined(PETSC_HAVE_HIP) 5999 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6000 #endif 6001 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6002 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6003 #endif 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6005 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6006 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6007 6008 /* 6009 Computes (B'*A')' since computing B*A directly is untenable 6010 6011 n p p 6012 [ ] [ ] [ ] 6013 m [ A ] * n [ B ] = m [ C ] 6014 [ ] [ ] [ ] 6015 6016 */ 6017 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6018 { 6019 Mat At, Bt, Ct; 6020 6021 PetscFunctionBegin; 6022 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6023 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6024 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6025 PetscCall(MatDestroy(&At)); 6026 PetscCall(MatDestroy(&Bt)); 6027 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6028 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6029 PetscCall(MatDestroy(&Ct)); 6030 PetscFunctionReturn(PETSC_SUCCESS); 6031 } 6032 6033 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6034 { 6035 PetscBool cisdense; 6036 6037 PetscFunctionBegin; 6038 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6039 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6040 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6041 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6042 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6043 PetscCall(MatSetUp(C)); 6044 6045 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6046 PetscFunctionReturn(PETSC_SUCCESS); 6047 } 6048 6049 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6050 { 6051 Mat_Product *product = C->product; 6052 Mat A = product->A, B = product->B; 6053 6054 PetscFunctionBegin; 6055 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6056 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6057 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6058 C->ops->productsymbolic = MatProductSymbolic_AB; 6059 PetscFunctionReturn(PETSC_SUCCESS); 6060 } 6061 6062 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6063 { 6064 Mat_Product *product = C->product; 6065 6066 PetscFunctionBegin; 6067 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6068 PetscFunctionReturn(PETSC_SUCCESS); 6069 } 6070 6071 /* 6072 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6073 6074 Input Parameters: 6075 6076 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6077 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6078 6079 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6080 6081 For Set1, j1[] contains column indices of the nonzeros. 6082 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6083 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6084 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6085 6086 Similar for Set2. 6087 6088 This routine merges the two sets of nonzeros row by row and removes repeats. 6089 6090 Output Parameters: (memory is allocated by the caller) 6091 6092 i[],j[]: the CSR of the merged matrix, which has m rows. 6093 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6094 imap2[]: similar to imap1[], but for Set2. 6095 Note we order nonzeros row-by-row and from left to right. 6096 */ 6097 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6098 { 6099 PetscInt r, m; /* Row index of mat */ 6100 PetscCount t, t1, t2, b1, e1, b2, e2; 6101 6102 PetscFunctionBegin; 6103 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6104 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6105 i[0] = 0; 6106 for (r = 0; r < m; r++) { /* Do row by row merging */ 6107 b1 = rowBegin1[r]; 6108 e1 = rowEnd1[r]; 6109 b2 = rowBegin2[r]; 6110 e2 = rowEnd2[r]; 6111 while (b1 < e1 && b2 < e2) { 6112 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6113 j[t] = j1[b1]; 6114 imap1[t1] = t; 6115 imap2[t2] = t; 6116 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6117 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6118 t1++; 6119 t2++; 6120 t++; 6121 } else if (j1[b1] < j2[b2]) { 6122 j[t] = j1[b1]; 6123 imap1[t1] = t; 6124 b1 += jmap1[t1 + 1] - jmap1[t1]; 6125 t1++; 6126 t++; 6127 } else { 6128 j[t] = j2[b2]; 6129 imap2[t2] = t; 6130 b2 += jmap2[t2 + 1] - jmap2[t2]; 6131 t2++; 6132 t++; 6133 } 6134 } 6135 /* Merge the remaining in either j1[] or j2[] */ 6136 while (b1 < e1) { 6137 j[t] = j1[b1]; 6138 imap1[t1] = t; 6139 b1 += jmap1[t1 + 1] - jmap1[t1]; 6140 t1++; 6141 t++; 6142 } 6143 while (b2 < e2) { 6144 j[t] = j2[b2]; 6145 imap2[t2] = t; 6146 b2 += jmap2[t2 + 1] - jmap2[t2]; 6147 t2++; 6148 t++; 6149 } 6150 PetscCall(PetscIntCast(t, i + r + 1)); 6151 } 6152 PetscFunctionReturn(PETSC_SUCCESS); 6153 } 6154 6155 /* 6156 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6157 6158 Input Parameters: 6159 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6160 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6161 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6162 6163 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6164 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6165 6166 Output Parameters: 6167 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6168 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6169 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6170 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6171 6172 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6173 Atot: number of entries belonging to the diagonal block. 6174 Annz: number of unique nonzeros belonging to the diagonal block. 6175 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6176 repeats (i.e., same 'i,j' pair). 6177 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6178 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6179 6180 Atot: number of entries belonging to the diagonal block 6181 Annz: number of unique nonzeros belonging to the diagonal block. 6182 6183 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6184 6185 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6186 */ 6187 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6188 { 6189 PetscInt cstart, cend, rstart, rend, row, col; 6190 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6191 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6192 PetscCount k, m, p, q, r, s, mid; 6193 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6194 6195 PetscFunctionBegin; 6196 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6197 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6198 m = rend - rstart; 6199 6200 /* Skip negative rows */ 6201 for (k = 0; k < n; k++) 6202 if (i[k] >= 0) break; 6203 6204 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6205 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6206 */ 6207 while (k < n) { 6208 row = i[k]; 6209 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6210 for (s = k; s < n; s++) 6211 if (i[s] != row) break; 6212 6213 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6214 for (p = k; p < s; p++) { 6215 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6216 } 6217 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6218 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6219 rowBegin[row - rstart] = k; 6220 rowMid[row - rstart] = mid; 6221 rowEnd[row - rstart] = s; 6222 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6223 6224 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6225 Atot += mid - k; 6226 Btot += s - mid; 6227 6228 /* Count unique nonzeros of this diag row */ 6229 for (p = k; p < mid;) { 6230 col = j[p]; 6231 do { 6232 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6233 p++; 6234 } while (p < mid && j[p] == col); 6235 Annz++; 6236 } 6237 6238 /* Count unique nonzeros of this offdiag row */ 6239 for (p = mid; p < s;) { 6240 col = j[p]; 6241 do { 6242 p++; 6243 } while (p < s && j[p] == col); 6244 Bnnz++; 6245 } 6246 k = s; 6247 } 6248 6249 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6250 PetscCall(PetscMalloc1(Atot, &Aperm)); 6251 PetscCall(PetscMalloc1(Btot, &Bperm)); 6252 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6253 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6254 6255 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6256 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6257 for (r = 0; r < m; r++) { 6258 k = rowBegin[r]; 6259 mid = rowMid[r]; 6260 s = rowEnd[r]; 6261 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6262 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6263 Atot += mid - k; 6264 Btot += s - mid; 6265 6266 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6267 for (p = k; p < mid;) { 6268 col = j[p]; 6269 q = p; 6270 do { 6271 p++; 6272 } while (p < mid && j[p] == col); 6273 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6274 Annz++; 6275 } 6276 6277 for (p = mid; p < s;) { 6278 col = j[p]; 6279 q = p; 6280 do { 6281 p++; 6282 } while (p < s && j[p] == col); 6283 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6284 Bnnz++; 6285 } 6286 } 6287 /* Output */ 6288 *Aperm_ = Aperm; 6289 *Annz_ = Annz; 6290 *Atot_ = Atot; 6291 *Ajmap_ = Ajmap; 6292 *Bperm_ = Bperm; 6293 *Bnnz_ = Bnnz; 6294 *Btot_ = Btot; 6295 *Bjmap_ = Bjmap; 6296 PetscFunctionReturn(PETSC_SUCCESS); 6297 } 6298 6299 /* 6300 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6301 6302 Input Parameters: 6303 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6304 nnz: number of unique nonzeros in the merged matrix 6305 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6306 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6307 6308 Output Parameter: (memory is allocated by the caller) 6309 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6310 6311 Example: 6312 nnz1 = 4 6313 nnz = 6 6314 imap = [1,3,4,5] 6315 jmap = [0,3,5,6,7] 6316 then, 6317 jmap_new = [0,0,3,3,5,6,7] 6318 */ 6319 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6320 { 6321 PetscCount k, p; 6322 6323 PetscFunctionBegin; 6324 jmap_new[0] = 0; 6325 p = nnz; /* p loops over jmap_new[] backwards */ 6326 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6327 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6328 } 6329 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6330 PetscFunctionReturn(PETSC_SUCCESS); 6331 } 6332 6333 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6334 { 6335 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6336 6337 PetscFunctionBegin; 6338 PetscCall(PetscSFDestroy(&coo->sf)); 6339 PetscCall(PetscFree(coo->Aperm1)); 6340 PetscCall(PetscFree(coo->Bperm1)); 6341 PetscCall(PetscFree(coo->Ajmap1)); 6342 PetscCall(PetscFree(coo->Bjmap1)); 6343 PetscCall(PetscFree(coo->Aimap2)); 6344 PetscCall(PetscFree(coo->Bimap2)); 6345 PetscCall(PetscFree(coo->Aperm2)); 6346 PetscCall(PetscFree(coo->Bperm2)); 6347 PetscCall(PetscFree(coo->Ajmap2)); 6348 PetscCall(PetscFree(coo->Bjmap2)); 6349 PetscCall(PetscFree(coo->Cperm1)); 6350 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6351 PetscCall(PetscFree(coo)); 6352 PetscFunctionReturn(PETSC_SUCCESS); 6353 } 6354 6355 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6356 { 6357 MPI_Comm comm; 6358 PetscMPIInt rank, size; 6359 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6360 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6361 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6362 PetscContainer container; 6363 MatCOOStruct_MPIAIJ *coo; 6364 6365 PetscFunctionBegin; 6366 PetscCall(PetscFree(mpiaij->garray)); 6367 PetscCall(VecDestroy(&mpiaij->lvec)); 6368 #if defined(PETSC_USE_CTABLE) 6369 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6370 #else 6371 PetscCall(PetscFree(mpiaij->colmap)); 6372 #endif 6373 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6374 mat->assembled = PETSC_FALSE; 6375 mat->was_assembled = PETSC_FALSE; 6376 6377 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6378 PetscCallMPI(MPI_Comm_size(comm, &size)); 6379 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6380 PetscCall(PetscLayoutSetUp(mat->rmap)); 6381 PetscCall(PetscLayoutSetUp(mat->cmap)); 6382 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6383 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6384 PetscCall(MatGetLocalSize(mat, &m, &n)); 6385 PetscCall(MatGetSize(mat, &M, &N)); 6386 6387 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6388 /* entries come first, then local rows, then remote rows. */ 6389 PetscCount n1 = coo_n, *perm1; 6390 PetscInt *i1 = coo_i, *j1 = coo_j; 6391 6392 PetscCall(PetscMalloc1(n1, &perm1)); 6393 for (k = 0; k < n1; k++) perm1[k] = k; 6394 6395 /* Manipulate indices so that entries with negative row or col indices will have smallest 6396 row indices, local entries will have greater but negative row indices, and remote entries 6397 will have positive row indices. 6398 */ 6399 for (k = 0; k < n1; k++) { 6400 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6401 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6402 else { 6403 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6404 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6405 } 6406 } 6407 6408 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6409 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6410 6411 /* Advance k to the first entry we need to take care of */ 6412 for (k = 0; k < n1; k++) 6413 if (i1[k] > PETSC_INT_MIN) break; 6414 PetscCount i1start = k; 6415 6416 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6417 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6418 6419 PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6420 6421 /* Send remote rows to their owner */ 6422 /* Find which rows should be sent to which remote ranks*/ 6423 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6424 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6425 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6426 const PetscInt *ranges; 6427 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6428 6429 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6430 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6431 for (k = rem; k < n1;) { 6432 PetscMPIInt owner; 6433 PetscInt firstRow, lastRow; 6434 6435 /* Locate a row range */ 6436 firstRow = i1[k]; /* first row of this owner */ 6437 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6438 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6439 6440 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6441 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6442 6443 /* All entries in [k,p) belong to this remote owner */ 6444 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6445 PetscMPIInt *sendto2; 6446 PetscInt *nentries2; 6447 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6448 6449 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6450 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6451 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6452 PetscCall(PetscFree2(sendto, nentries2)); 6453 sendto = sendto2; 6454 nentries = nentries2; 6455 maxNsend = maxNsend2; 6456 } 6457 sendto[nsend] = owner; 6458 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6459 nsend++; 6460 k = p; 6461 } 6462 6463 /* Build 1st SF to know offsets on remote to send data */ 6464 PetscSF sf1; 6465 PetscInt nroots = 1, nroots2 = 0; 6466 PetscInt nleaves = nsend, nleaves2 = 0; 6467 PetscInt *offsets; 6468 PetscSFNode *iremote; 6469 6470 PetscCall(PetscSFCreate(comm, &sf1)); 6471 PetscCall(PetscMalloc1(nsend, &iremote)); 6472 PetscCall(PetscMalloc1(nsend, &offsets)); 6473 for (k = 0; k < nsend; k++) { 6474 iremote[k].rank = sendto[k]; 6475 iremote[k].index = 0; 6476 nleaves2 += nentries[k]; 6477 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6478 } 6479 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6480 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6481 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6482 PetscCall(PetscSFDestroy(&sf1)); 6483 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6484 6485 /* Build 2nd SF to send remote COOs to their owner */ 6486 PetscSF sf2; 6487 nroots = nroots2; 6488 nleaves = nleaves2; 6489 PetscCall(PetscSFCreate(comm, &sf2)); 6490 PetscCall(PetscSFSetFromOptions(sf2)); 6491 PetscCall(PetscMalloc1(nleaves, &iremote)); 6492 p = 0; 6493 for (k = 0; k < nsend; k++) { 6494 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6495 for (q = 0; q < nentries[k]; q++, p++) { 6496 iremote[p].rank = sendto[k]; 6497 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6498 } 6499 } 6500 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6501 6502 /* Send the remote COOs to their owner */ 6503 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6504 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6505 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6506 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6507 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6508 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6509 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6510 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6511 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6512 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6513 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6514 6515 PetscCall(PetscFree(offsets)); 6516 PetscCall(PetscFree2(sendto, nentries)); 6517 6518 /* Sort received COOs by row along with the permutation array */ 6519 for (k = 0; k < n2; k++) perm2[k] = k; 6520 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6521 6522 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6523 PetscCount *Cperm1; 6524 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6525 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6526 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6527 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6528 6529 /* Support for HYPRE matrices, kind of a hack. 6530 Swap min column with diagonal so that diagonal values will go first */ 6531 PetscBool hypre; 6532 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6533 if (hypre) { 6534 PetscInt *minj; 6535 PetscBT hasdiag; 6536 6537 PetscCall(PetscBTCreate(m, &hasdiag)); 6538 PetscCall(PetscMalloc1(m, &minj)); 6539 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6540 for (k = i1start; k < rem; k++) { 6541 if (j1[k] < cstart || j1[k] >= cend) continue; 6542 const PetscInt rindex = i1[k] - rstart; 6543 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6544 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6545 } 6546 for (k = 0; k < n2; k++) { 6547 if (j2[k] < cstart || j2[k] >= cend) continue; 6548 const PetscInt rindex = i2[k] - rstart; 6549 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6550 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6551 } 6552 for (k = i1start; k < rem; k++) { 6553 const PetscInt rindex = i1[k] - rstart; 6554 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6555 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6556 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6557 } 6558 for (k = 0; k < n2; k++) { 6559 const PetscInt rindex = i2[k] - rstart; 6560 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6561 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6562 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6563 } 6564 PetscCall(PetscBTDestroy(&hasdiag)); 6565 PetscCall(PetscFree(minj)); 6566 } 6567 6568 /* Split local COOs and received COOs into diag/offdiag portions */ 6569 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6570 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6571 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6572 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6573 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6574 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6575 6576 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6577 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6578 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6579 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6580 6581 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6582 PetscInt *Ai, *Bi; 6583 PetscInt *Aj, *Bj; 6584 6585 PetscCall(PetscMalloc1(m + 1, &Ai)); 6586 PetscCall(PetscMalloc1(m + 1, &Bi)); 6587 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6588 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6589 6590 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6591 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6592 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6593 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6594 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6595 6596 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6597 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6598 6599 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6600 /* expect nonzeros in A/B most likely have local contributing entries */ 6601 PetscInt Annz = Ai[m]; 6602 PetscInt Bnnz = Bi[m]; 6603 PetscCount *Ajmap1_new, *Bjmap1_new; 6604 6605 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6606 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6607 6608 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6609 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6610 6611 PetscCall(PetscFree(Aimap1)); 6612 PetscCall(PetscFree(Ajmap1)); 6613 PetscCall(PetscFree(Bimap1)); 6614 PetscCall(PetscFree(Bjmap1)); 6615 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6616 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6617 PetscCall(PetscFree(perm1)); 6618 PetscCall(PetscFree3(i2, j2, perm2)); 6619 6620 Ajmap1 = Ajmap1_new; 6621 Bjmap1 = Bjmap1_new; 6622 6623 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6624 if (Annz < Annz1 + Annz2) { 6625 PetscInt *Aj_new; 6626 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6627 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6628 PetscCall(PetscFree(Aj)); 6629 Aj = Aj_new; 6630 } 6631 6632 if (Bnnz < Bnnz1 + Bnnz2) { 6633 PetscInt *Bj_new; 6634 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6635 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6636 PetscCall(PetscFree(Bj)); 6637 Bj = Bj_new; 6638 } 6639 6640 /* Create new submatrices for on-process and off-process coupling */ 6641 PetscScalar *Aa, *Ba; 6642 MatType rtype; 6643 Mat_SeqAIJ *a, *b; 6644 PetscObjectState state; 6645 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6646 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6647 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6648 if (cstart) { 6649 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6650 } 6651 6652 PetscCall(MatGetRootType_Private(mat, &rtype)); 6653 6654 MatSeqXAIJGetOptions_Private(mpiaij->A); 6655 PetscCall(MatDestroy(&mpiaij->A)); 6656 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6657 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6658 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6659 6660 MatSeqXAIJGetOptions_Private(mpiaij->B); 6661 PetscCall(MatDestroy(&mpiaij->B)); 6662 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6663 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6664 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6665 6666 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6667 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6668 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6669 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6670 6671 a = (Mat_SeqAIJ *)mpiaij->A->data; 6672 b = (Mat_SeqAIJ *)mpiaij->B->data; 6673 a->free_a = PETSC_TRUE; 6674 a->free_ij = PETSC_TRUE; 6675 b->free_a = PETSC_TRUE; 6676 b->free_ij = PETSC_TRUE; 6677 a->maxnz = a->nz; 6678 b->maxnz = b->nz; 6679 6680 /* conversion must happen AFTER multiply setup */ 6681 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6682 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6683 PetscCall(VecDestroy(&mpiaij->lvec)); 6684 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6685 6686 // Put the COO struct in a container and then attach that to the matrix 6687 PetscCall(PetscMalloc1(1, &coo)); 6688 coo->n = coo_n; 6689 coo->sf = sf2; 6690 coo->sendlen = nleaves; 6691 coo->recvlen = nroots; 6692 coo->Annz = Annz; 6693 coo->Bnnz = Bnnz; 6694 coo->Annz2 = Annz2; 6695 coo->Bnnz2 = Bnnz2; 6696 coo->Atot1 = Atot1; 6697 coo->Atot2 = Atot2; 6698 coo->Btot1 = Btot1; 6699 coo->Btot2 = Btot2; 6700 coo->Ajmap1 = Ajmap1; 6701 coo->Aperm1 = Aperm1; 6702 coo->Bjmap1 = Bjmap1; 6703 coo->Bperm1 = Bperm1; 6704 coo->Aimap2 = Aimap2; 6705 coo->Ajmap2 = Ajmap2; 6706 coo->Aperm2 = Aperm2; 6707 coo->Bimap2 = Bimap2; 6708 coo->Bjmap2 = Bjmap2; 6709 coo->Bperm2 = Bperm2; 6710 coo->Cperm1 = Cperm1; 6711 // Allocate in preallocation. If not used, it has zero cost on host 6712 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6713 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6714 PetscCall(PetscContainerSetPointer(container, coo)); 6715 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6716 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6717 PetscCall(PetscContainerDestroy(&container)); 6718 PetscFunctionReturn(PETSC_SUCCESS); 6719 } 6720 6721 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6722 { 6723 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6724 Mat A = mpiaij->A, B = mpiaij->B; 6725 PetscScalar *Aa, *Ba; 6726 PetscScalar *sendbuf, *recvbuf; 6727 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6728 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6729 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6730 const PetscCount *Cperm1; 6731 PetscContainer container; 6732 MatCOOStruct_MPIAIJ *coo; 6733 6734 PetscFunctionBegin; 6735 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6736 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6737 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6738 sendbuf = coo->sendbuf; 6739 recvbuf = coo->recvbuf; 6740 Ajmap1 = coo->Ajmap1; 6741 Ajmap2 = coo->Ajmap2; 6742 Aimap2 = coo->Aimap2; 6743 Bjmap1 = coo->Bjmap1; 6744 Bjmap2 = coo->Bjmap2; 6745 Bimap2 = coo->Bimap2; 6746 Aperm1 = coo->Aperm1; 6747 Aperm2 = coo->Aperm2; 6748 Bperm1 = coo->Bperm1; 6749 Bperm2 = coo->Bperm2; 6750 Cperm1 = coo->Cperm1; 6751 6752 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6753 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6754 6755 /* Pack entries to be sent to remote */ 6756 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6757 6758 /* Send remote entries to their owner and overlap the communication with local computation */ 6759 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6760 /* Add local entries to A and B */ 6761 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6762 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6763 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6764 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6765 } 6766 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6767 PetscScalar sum = 0.0; 6768 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6769 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6770 } 6771 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6772 6773 /* Add received remote entries to A and B */ 6774 for (PetscCount i = 0; i < coo->Annz2; i++) { 6775 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6776 } 6777 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6778 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6779 } 6780 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6781 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6782 PetscFunctionReturn(PETSC_SUCCESS); 6783 } 6784 6785 /*MC 6786 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6787 6788 Options Database Keys: 6789 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6790 6791 Level: beginner 6792 6793 Notes: 6794 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6795 in this case the values associated with the rows and columns one passes in are set to zero 6796 in the matrix 6797 6798 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6799 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6800 6801 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6802 M*/ 6803 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6804 { 6805 Mat_MPIAIJ *b; 6806 PetscMPIInt size; 6807 6808 PetscFunctionBegin; 6809 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6810 6811 PetscCall(PetscNew(&b)); 6812 B->data = (void *)b; 6813 B->ops[0] = MatOps_Values; 6814 B->assembled = PETSC_FALSE; 6815 B->insertmode = NOT_SET_VALUES; 6816 b->size = size; 6817 6818 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6819 6820 /* build cache for off array entries formed */ 6821 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6822 6823 b->donotstash = PETSC_FALSE; 6824 b->colmap = NULL; 6825 b->garray = NULL; 6826 b->roworiented = PETSC_TRUE; 6827 6828 /* stuff used for matrix vector multiply */ 6829 b->lvec = NULL; 6830 b->Mvctx = NULL; 6831 6832 /* stuff for MatGetRow() */ 6833 b->rowindices = NULL; 6834 b->rowvalues = NULL; 6835 b->getrowactive = PETSC_FALSE; 6836 6837 /* flexible pointer used in CUSPARSE classes */ 6838 b->spptr = NULL; 6839 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6851 #if defined(PETSC_HAVE_CUDA) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6853 #endif 6854 #if defined(PETSC_HAVE_HIP) 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6856 #endif 6857 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6859 #endif 6860 #if defined(PETSC_HAVE_MKL_SPARSE) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6862 #endif 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6867 #if defined(PETSC_HAVE_ELEMENTAL) 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6869 #endif 6870 #if defined(PETSC_HAVE_SCALAPACK) 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6872 #endif 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6875 #if defined(PETSC_HAVE_HYPRE) 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6878 #endif 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6883 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6884 PetscFunctionReturn(PETSC_SUCCESS); 6885 } 6886 6887 /*@ 6888 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6889 and "off-diagonal" part of the matrix in CSR format. 6890 6891 Collective 6892 6893 Input Parameters: 6894 + comm - MPI communicator 6895 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6896 . n - This value should be the same as the local size used in creating the 6897 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6898 calculated if `N` is given) For square matrices `n` is almost always `m`. 6899 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6900 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6901 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6902 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6903 . a - matrix values 6904 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6905 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6906 - oa - matrix values 6907 6908 Output Parameter: 6909 . mat - the matrix 6910 6911 Level: advanced 6912 6913 Notes: 6914 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6915 must free the arrays once the matrix has been destroyed and not before. 6916 6917 The `i` and `j` indices are 0 based 6918 6919 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6920 6921 This sets local rows and cannot be used to set off-processor values. 6922 6923 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6924 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6925 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6926 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6927 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6928 communication if it is known that only local entries will be set. 6929 6930 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6931 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6932 @*/ 6933 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6934 { 6935 Mat_MPIAIJ *maij; 6936 6937 PetscFunctionBegin; 6938 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6939 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6940 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6941 PetscCall(MatCreate(comm, mat)); 6942 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6943 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6944 maij = (Mat_MPIAIJ *)(*mat)->data; 6945 6946 (*mat)->preallocated = PETSC_TRUE; 6947 6948 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6949 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6950 6951 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6952 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6953 6954 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6955 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6956 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6957 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6958 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6959 PetscFunctionReturn(PETSC_SUCCESS); 6960 } 6961 6962 typedef struct { 6963 Mat *mp; /* intermediate products */ 6964 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6965 PetscInt cp; /* number of intermediate products */ 6966 6967 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6968 PetscInt *startsj_s, *startsj_r; 6969 PetscScalar *bufa; 6970 Mat P_oth; 6971 6972 /* may take advantage of merging product->B */ 6973 Mat Bloc; /* B-local by merging diag and off-diag */ 6974 6975 /* cusparse does not have support to split between symbolic and numeric phases. 6976 When api_user is true, we don't need to update the numerical values 6977 of the temporary storage */ 6978 PetscBool reusesym; 6979 6980 /* support for COO values insertion */ 6981 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6982 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6983 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6984 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6985 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6986 PetscMemType mtype; 6987 6988 /* customization */ 6989 PetscBool abmerge; 6990 PetscBool P_oth_bind; 6991 } MatMatMPIAIJBACKEND; 6992 6993 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6994 { 6995 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6996 PetscInt i; 6997 6998 PetscFunctionBegin; 6999 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7000 PetscCall(PetscFree(mmdata->bufa)); 7001 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7002 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7003 PetscCall(MatDestroy(&mmdata->P_oth)); 7004 PetscCall(MatDestroy(&mmdata->Bloc)); 7005 PetscCall(PetscSFDestroy(&mmdata->sf)); 7006 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7007 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7008 PetscCall(PetscFree(mmdata->own[0])); 7009 PetscCall(PetscFree(mmdata->own)); 7010 PetscCall(PetscFree(mmdata->off[0])); 7011 PetscCall(PetscFree(mmdata->off)); 7012 PetscCall(PetscFree(mmdata)); 7013 PetscFunctionReturn(PETSC_SUCCESS); 7014 } 7015 7016 /* Copy selected n entries with indices in idx[] of A to v[]. 7017 If idx is NULL, copy the whole data array of A to v[] 7018 */ 7019 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7020 { 7021 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7022 7023 PetscFunctionBegin; 7024 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7025 if (f) { 7026 PetscCall((*f)(A, n, idx, v)); 7027 } else { 7028 const PetscScalar *vv; 7029 7030 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7031 if (n && idx) { 7032 PetscScalar *w = v; 7033 const PetscInt *oi = idx; 7034 PetscInt j; 7035 7036 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7037 } else { 7038 PetscCall(PetscArraycpy(v, vv, n)); 7039 } 7040 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7041 } 7042 PetscFunctionReturn(PETSC_SUCCESS); 7043 } 7044 7045 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7046 { 7047 MatMatMPIAIJBACKEND *mmdata; 7048 PetscInt i, n_d, n_o; 7049 7050 PetscFunctionBegin; 7051 MatCheckProduct(C, 1); 7052 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7053 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7054 if (!mmdata->reusesym) { /* update temporary matrices */ 7055 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7056 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7057 } 7058 mmdata->reusesym = PETSC_FALSE; 7059 7060 for (i = 0; i < mmdata->cp; i++) { 7061 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7062 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7063 } 7064 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7065 PetscInt noff; 7066 7067 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7068 if (mmdata->mptmp[i]) continue; 7069 if (noff) { 7070 PetscInt nown; 7071 7072 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7073 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7074 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7075 n_o += noff; 7076 n_d += nown; 7077 } else { 7078 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7079 7080 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7081 n_d += mm->nz; 7082 } 7083 } 7084 if (mmdata->hasoffproc) { /* offprocess insertion */ 7085 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7086 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7087 } 7088 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7089 PetscFunctionReturn(PETSC_SUCCESS); 7090 } 7091 7092 /* Support for Pt * A, A * P, or Pt * A * P */ 7093 #define MAX_NUMBER_INTERMEDIATE 4 7094 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7095 { 7096 Mat_Product *product = C->product; 7097 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7098 Mat_MPIAIJ *a, *p; 7099 MatMatMPIAIJBACKEND *mmdata; 7100 ISLocalToGlobalMapping P_oth_l2g = NULL; 7101 IS glob = NULL; 7102 const char *prefix; 7103 char pprefix[256]; 7104 const PetscInt *globidx, *P_oth_idx; 7105 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7106 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7107 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7108 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7109 /* a base offset; type-2: sparse with a local to global map table */ 7110 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7111 7112 MatProductType ptype; 7113 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7114 PetscMPIInt size; 7115 7116 PetscFunctionBegin; 7117 MatCheckProduct(C, 1); 7118 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7119 ptype = product->type; 7120 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7121 ptype = MATPRODUCT_AB; 7122 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7123 } 7124 switch (ptype) { 7125 case MATPRODUCT_AB: 7126 A = product->A; 7127 P = product->B; 7128 m = A->rmap->n; 7129 n = P->cmap->n; 7130 M = A->rmap->N; 7131 N = P->cmap->N; 7132 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7133 break; 7134 case MATPRODUCT_AtB: 7135 P = product->A; 7136 A = product->B; 7137 m = P->cmap->n; 7138 n = A->cmap->n; 7139 M = P->cmap->N; 7140 N = A->cmap->N; 7141 hasoffproc = PETSC_TRUE; 7142 break; 7143 case MATPRODUCT_PtAP: 7144 A = product->A; 7145 P = product->B; 7146 m = P->cmap->n; 7147 n = P->cmap->n; 7148 M = P->cmap->N; 7149 N = P->cmap->N; 7150 hasoffproc = PETSC_TRUE; 7151 break; 7152 default: 7153 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7154 } 7155 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7156 if (size == 1) hasoffproc = PETSC_FALSE; 7157 7158 /* defaults */ 7159 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7160 mp[i] = NULL; 7161 mptmp[i] = PETSC_FALSE; 7162 rmapt[i] = -1; 7163 cmapt[i] = -1; 7164 rmapa[i] = NULL; 7165 cmapa[i] = NULL; 7166 } 7167 7168 /* customization */ 7169 PetscCall(PetscNew(&mmdata)); 7170 mmdata->reusesym = product->api_user; 7171 if (ptype == MATPRODUCT_AB) { 7172 if (product->api_user) { 7173 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7174 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7175 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7176 PetscOptionsEnd(); 7177 } else { 7178 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7179 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7180 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7181 PetscOptionsEnd(); 7182 } 7183 } else if (ptype == MATPRODUCT_PtAP) { 7184 if (product->api_user) { 7185 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7186 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7187 PetscOptionsEnd(); 7188 } else { 7189 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7190 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7191 PetscOptionsEnd(); 7192 } 7193 } 7194 a = (Mat_MPIAIJ *)A->data; 7195 p = (Mat_MPIAIJ *)P->data; 7196 PetscCall(MatSetSizes(C, m, n, M, N)); 7197 PetscCall(PetscLayoutSetUp(C->rmap)); 7198 PetscCall(PetscLayoutSetUp(C->cmap)); 7199 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7200 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7201 7202 cp = 0; 7203 switch (ptype) { 7204 case MATPRODUCT_AB: /* A * P */ 7205 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7206 7207 /* A_diag * P_local (merged or not) */ 7208 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7209 /* P is product->B */ 7210 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7211 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7212 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7213 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7214 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7215 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7216 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7217 mp[cp]->product->api_user = product->api_user; 7218 PetscCall(MatProductSetFromOptions(mp[cp])); 7219 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7220 PetscCall(ISGetIndices(glob, &globidx)); 7221 rmapt[cp] = 1; 7222 cmapt[cp] = 2; 7223 cmapa[cp] = globidx; 7224 mptmp[cp] = PETSC_FALSE; 7225 cp++; 7226 } else { /* A_diag * P_diag and A_diag * P_off */ 7227 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7228 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7229 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7230 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7231 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7232 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7233 mp[cp]->product->api_user = product->api_user; 7234 PetscCall(MatProductSetFromOptions(mp[cp])); 7235 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7236 rmapt[cp] = 1; 7237 cmapt[cp] = 1; 7238 mptmp[cp] = PETSC_FALSE; 7239 cp++; 7240 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7241 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7242 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7243 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7244 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7245 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7246 mp[cp]->product->api_user = product->api_user; 7247 PetscCall(MatProductSetFromOptions(mp[cp])); 7248 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7249 rmapt[cp] = 1; 7250 cmapt[cp] = 2; 7251 cmapa[cp] = p->garray; 7252 mptmp[cp] = PETSC_FALSE; 7253 cp++; 7254 } 7255 7256 /* A_off * P_other */ 7257 if (mmdata->P_oth) { 7258 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7259 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7260 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7261 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7262 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7263 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7264 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7265 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7266 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7267 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7268 mp[cp]->product->api_user = product->api_user; 7269 PetscCall(MatProductSetFromOptions(mp[cp])); 7270 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7271 rmapt[cp] = 1; 7272 cmapt[cp] = 2; 7273 cmapa[cp] = P_oth_idx; 7274 mptmp[cp] = PETSC_FALSE; 7275 cp++; 7276 } 7277 break; 7278 7279 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7280 /* A is product->B */ 7281 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7282 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7283 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7284 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7285 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7286 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7287 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7288 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7289 mp[cp]->product->api_user = product->api_user; 7290 PetscCall(MatProductSetFromOptions(mp[cp])); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 PetscCall(ISGetIndices(glob, &globidx)); 7293 rmapt[cp] = 2; 7294 rmapa[cp] = globidx; 7295 cmapt[cp] = 2; 7296 cmapa[cp] = globidx; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 } else { 7300 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7301 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7302 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7303 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7304 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7305 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7306 mp[cp]->product->api_user = product->api_user; 7307 PetscCall(MatProductSetFromOptions(mp[cp])); 7308 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7309 PetscCall(ISGetIndices(glob, &globidx)); 7310 rmapt[cp] = 1; 7311 cmapt[cp] = 2; 7312 cmapa[cp] = globidx; 7313 mptmp[cp] = PETSC_FALSE; 7314 cp++; 7315 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7316 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7317 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7318 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7319 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7320 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7321 mp[cp]->product->api_user = product->api_user; 7322 PetscCall(MatProductSetFromOptions(mp[cp])); 7323 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7324 rmapt[cp] = 2; 7325 rmapa[cp] = p->garray; 7326 cmapt[cp] = 2; 7327 cmapa[cp] = globidx; 7328 mptmp[cp] = PETSC_FALSE; 7329 cp++; 7330 } 7331 break; 7332 case MATPRODUCT_PtAP: 7333 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7334 /* P is product->B */ 7335 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7336 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7337 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7338 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7339 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7340 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7341 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7342 mp[cp]->product->api_user = product->api_user; 7343 PetscCall(MatProductSetFromOptions(mp[cp])); 7344 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7345 PetscCall(ISGetIndices(glob, &globidx)); 7346 rmapt[cp] = 2; 7347 rmapa[cp] = globidx; 7348 cmapt[cp] = 2; 7349 cmapa[cp] = globidx; 7350 mptmp[cp] = PETSC_FALSE; 7351 cp++; 7352 if (mmdata->P_oth) { 7353 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7354 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7355 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7356 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7357 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 mptmp[cp] = PETSC_TRUE; 7367 cp++; 7368 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7369 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7370 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7371 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7372 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7373 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7374 mp[cp]->product->api_user = product->api_user; 7375 PetscCall(MatProductSetFromOptions(mp[cp])); 7376 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7377 rmapt[cp] = 2; 7378 rmapa[cp] = globidx; 7379 cmapt[cp] = 2; 7380 cmapa[cp] = P_oth_idx; 7381 mptmp[cp] = PETSC_FALSE; 7382 cp++; 7383 } 7384 break; 7385 default: 7386 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7387 } 7388 /* sanity check */ 7389 if (size > 1) 7390 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7391 7392 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7393 for (i = 0; i < cp; i++) { 7394 mmdata->mp[i] = mp[i]; 7395 mmdata->mptmp[i] = mptmp[i]; 7396 } 7397 mmdata->cp = cp; 7398 C->product->data = mmdata; 7399 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7400 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7401 7402 /* memory type */ 7403 mmdata->mtype = PETSC_MEMTYPE_HOST; 7404 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7405 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7406 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7407 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7408 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7409 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7410 7411 /* prepare coo coordinates for values insertion */ 7412 7413 /* count total nonzeros of those intermediate seqaij Mats 7414 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7415 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7416 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7417 */ 7418 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7419 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7420 if (mptmp[cp]) continue; 7421 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7422 const PetscInt *rmap = rmapa[cp]; 7423 const PetscInt mr = mp[cp]->rmap->n; 7424 const PetscInt rs = C->rmap->rstart; 7425 const PetscInt re = C->rmap->rend; 7426 const PetscInt *ii = mm->i; 7427 for (i = 0; i < mr; i++) { 7428 const PetscInt gr = rmap[i]; 7429 const PetscInt nz = ii[i + 1] - ii[i]; 7430 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7431 else ncoo_oown += nz; /* this row is local */ 7432 } 7433 } else ncoo_d += mm->nz; 7434 } 7435 7436 /* 7437 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7438 7439 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7440 7441 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7442 7443 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7444 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7445 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7446 7447 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7448 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7449 */ 7450 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7451 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7452 7453 /* gather (i,j) of nonzeros inserted by remote procs */ 7454 if (hasoffproc) { 7455 PetscSF msf; 7456 PetscInt ncoo2, *coo_i2, *coo_j2; 7457 7458 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7459 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7460 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7461 7462 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7463 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7464 PetscInt *idxoff = mmdata->off[cp]; 7465 PetscInt *idxown = mmdata->own[cp]; 7466 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7467 const PetscInt *rmap = rmapa[cp]; 7468 const PetscInt *cmap = cmapa[cp]; 7469 const PetscInt *ii = mm->i; 7470 PetscInt *coi = coo_i + ncoo_o; 7471 PetscInt *coj = coo_j + ncoo_o; 7472 const PetscInt mr = mp[cp]->rmap->n; 7473 const PetscInt rs = C->rmap->rstart; 7474 const PetscInt re = C->rmap->rend; 7475 const PetscInt cs = C->cmap->rstart; 7476 for (i = 0; i < mr; i++) { 7477 const PetscInt *jj = mm->j + ii[i]; 7478 const PetscInt gr = rmap[i]; 7479 const PetscInt nz = ii[i + 1] - ii[i]; 7480 if (gr < rs || gr >= re) { /* this is an offproc row */ 7481 for (j = ii[i]; j < ii[i + 1]; j++) { 7482 *coi++ = gr; 7483 *idxoff++ = j; 7484 } 7485 if (!cmapt[cp]) { /* already global */ 7486 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7487 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7488 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7489 } else { /* offdiag */ 7490 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7491 } 7492 ncoo_o += nz; 7493 } else { /* this is a local row */ 7494 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7495 } 7496 } 7497 } 7498 mmdata->off[cp + 1] = idxoff; 7499 mmdata->own[cp + 1] = idxown; 7500 } 7501 7502 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7503 PetscInt incoo_o; 7504 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7505 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7506 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7507 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7508 ncoo = ncoo_d + ncoo_oown + ncoo2; 7509 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7510 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7511 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7512 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7513 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7514 PetscCall(PetscFree2(coo_i, coo_j)); 7515 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7516 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7517 coo_i = coo_i2; 7518 coo_j = coo_j2; 7519 } else { /* no offproc values insertion */ 7520 ncoo = ncoo_d; 7521 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7522 7523 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7524 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7525 PetscCall(PetscSFSetUp(mmdata->sf)); 7526 } 7527 mmdata->hasoffproc = hasoffproc; 7528 7529 /* gather (i,j) of nonzeros inserted locally */ 7530 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7531 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7532 PetscInt *coi = coo_i + ncoo_d; 7533 PetscInt *coj = coo_j + ncoo_d; 7534 const PetscInt *jj = mm->j; 7535 const PetscInt *ii = mm->i; 7536 const PetscInt *cmap = cmapa[cp]; 7537 const PetscInt *rmap = rmapa[cp]; 7538 const PetscInt mr = mp[cp]->rmap->n; 7539 const PetscInt rs = C->rmap->rstart; 7540 const PetscInt re = C->rmap->rend; 7541 const PetscInt cs = C->cmap->rstart; 7542 7543 if (mptmp[cp]) continue; 7544 if (rmapt[cp] == 1) { /* consecutive rows */ 7545 /* fill coo_i */ 7546 for (i = 0; i < mr; i++) { 7547 const PetscInt gr = i + rs; 7548 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7549 } 7550 /* fill coo_j */ 7551 if (!cmapt[cp]) { /* type-0, already global */ 7552 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7553 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7554 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7555 } else { /* type-2, local to global for sparse columns */ 7556 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7557 } 7558 ncoo_d += mm->nz; 7559 } else if (rmapt[cp] == 2) { /* sparse rows */ 7560 for (i = 0; i < mr; i++) { 7561 const PetscInt *jj = mm->j + ii[i]; 7562 const PetscInt gr = rmap[i]; 7563 const PetscInt nz = ii[i + 1] - ii[i]; 7564 if (gr >= rs && gr < re) { /* local rows */ 7565 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7566 if (!cmapt[cp]) { /* type-0, already global */ 7567 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7568 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7569 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7570 } else { /* type-2, local to global for sparse columns */ 7571 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7572 } 7573 ncoo_d += nz; 7574 } 7575 } 7576 } 7577 } 7578 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7579 PetscCall(ISDestroy(&glob)); 7580 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7581 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7582 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7583 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7584 7585 /* set block sizes */ 7586 A = product->A; 7587 P = product->B; 7588 switch (ptype) { 7589 case MATPRODUCT_PtAP: 7590 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7591 break; 7592 case MATPRODUCT_RARt: 7593 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7594 break; 7595 case MATPRODUCT_ABC: 7596 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7597 break; 7598 case MATPRODUCT_AB: 7599 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7600 break; 7601 case MATPRODUCT_AtB: 7602 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7603 break; 7604 case MATPRODUCT_ABt: 7605 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7606 break; 7607 default: 7608 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7609 } 7610 7611 /* preallocate with COO data */ 7612 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7613 PetscCall(PetscFree2(coo_i, coo_j)); 7614 PetscFunctionReturn(PETSC_SUCCESS); 7615 } 7616 7617 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7618 { 7619 Mat_Product *product = mat->product; 7620 #if defined(PETSC_HAVE_DEVICE) 7621 PetscBool match = PETSC_FALSE; 7622 PetscBool usecpu = PETSC_FALSE; 7623 #else 7624 PetscBool match = PETSC_TRUE; 7625 #endif 7626 7627 PetscFunctionBegin; 7628 MatCheckProduct(mat, 1); 7629 #if defined(PETSC_HAVE_DEVICE) 7630 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7631 if (match) { /* we can always fallback to the CPU if requested */ 7632 switch (product->type) { 7633 case MATPRODUCT_AB: 7634 if (product->api_user) { 7635 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7636 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7637 PetscOptionsEnd(); 7638 } else { 7639 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7640 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7641 PetscOptionsEnd(); 7642 } 7643 break; 7644 case MATPRODUCT_AtB: 7645 if (product->api_user) { 7646 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7647 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7648 PetscOptionsEnd(); 7649 } else { 7650 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7651 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7652 PetscOptionsEnd(); 7653 } 7654 break; 7655 case MATPRODUCT_PtAP: 7656 if (product->api_user) { 7657 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7658 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7659 PetscOptionsEnd(); 7660 } else { 7661 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7662 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7663 PetscOptionsEnd(); 7664 } 7665 break; 7666 default: 7667 break; 7668 } 7669 match = (PetscBool)!usecpu; 7670 } 7671 #endif 7672 if (match) { 7673 switch (product->type) { 7674 case MATPRODUCT_AB: 7675 case MATPRODUCT_AtB: 7676 case MATPRODUCT_PtAP: 7677 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7678 break; 7679 default: 7680 break; 7681 } 7682 } 7683 /* fallback to MPIAIJ ops */ 7684 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7685 PetscFunctionReturn(PETSC_SUCCESS); 7686 } 7687 7688 /* 7689 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7690 7691 n - the number of block indices in cc[] 7692 cc - the block indices (must be large enough to contain the indices) 7693 */ 7694 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7695 { 7696 PetscInt cnt = -1, nidx, j; 7697 const PetscInt *idx; 7698 7699 PetscFunctionBegin; 7700 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7701 if (nidx) { 7702 cnt = 0; 7703 cc[cnt] = idx[0] / bs; 7704 for (j = 1; j < nidx; j++) { 7705 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7706 } 7707 } 7708 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7709 *n = cnt + 1; 7710 PetscFunctionReturn(PETSC_SUCCESS); 7711 } 7712 7713 /* 7714 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7715 7716 ncollapsed - the number of block indices 7717 collapsed - the block indices (must be large enough to contain the indices) 7718 */ 7719 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7720 { 7721 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7722 7723 PetscFunctionBegin; 7724 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7725 for (i = start + 1; i < start + bs; i++) { 7726 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7727 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7728 cprevtmp = cprev; 7729 cprev = merged; 7730 merged = cprevtmp; 7731 } 7732 *ncollapsed = nprev; 7733 if (collapsed) *collapsed = cprev; 7734 PetscFunctionReturn(PETSC_SUCCESS); 7735 } 7736 7737 /* 7738 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7739 7740 Input Parameter: 7741 . Amat - matrix 7742 - symmetrize - make the result symmetric 7743 + scale - scale with diagonal 7744 7745 Output Parameter: 7746 . a_Gmat - output scalar graph >= 0 7747 7748 */ 7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7750 { 7751 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7752 MPI_Comm comm; 7753 Mat Gmat; 7754 PetscBool ismpiaij, isseqaij; 7755 Mat a, b, c; 7756 MatType jtype; 7757 7758 PetscFunctionBegin; 7759 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7760 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7761 PetscCall(MatGetSize(Amat, &MM, &NN)); 7762 PetscCall(MatGetBlockSize(Amat, &bs)); 7763 nloc = (Iend - Istart) / bs; 7764 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7767 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7768 7769 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7770 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7771 implementation */ 7772 if (bs > 1) { 7773 PetscCall(MatGetType(Amat, &jtype)); 7774 PetscCall(MatCreate(comm, &Gmat)); 7775 PetscCall(MatSetType(Gmat, jtype)); 7776 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7777 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7778 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7779 PetscInt *d_nnz, *o_nnz; 7780 MatScalar *aa, val, *AA; 7781 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7782 7783 if (isseqaij) { 7784 a = Amat; 7785 b = NULL; 7786 } else { 7787 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7788 a = d->A; 7789 b = d->B; 7790 } 7791 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7792 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7793 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7794 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7795 const PetscInt *cols1, *cols2; 7796 7797 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7798 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7799 nnz[brow / bs] = nc2 / bs; 7800 if (nc2 % bs) ok = 0; 7801 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7802 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7803 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7804 if (nc1 != nc2) ok = 0; 7805 else { 7806 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7807 if (cols1[jj] != cols2[jj]) ok = 0; 7808 if (cols1[jj] % bs != jj % bs) ok = 0; 7809 } 7810 } 7811 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7812 } 7813 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7814 if (!ok) { 7815 PetscCall(PetscFree2(d_nnz, o_nnz)); 7816 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7817 goto old_bs; 7818 } 7819 } 7820 } 7821 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7822 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7823 PetscCall(PetscFree2(d_nnz, o_nnz)); 7824 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7825 // diag 7826 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7827 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7828 7829 ai = aseq->i; 7830 n = ai[brow + 1] - ai[brow]; 7831 aj = aseq->j + ai[brow]; 7832 for (PetscInt k = 0; k < n; k += bs) { // block columns 7833 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7834 val = 0; 7835 if (index_size == 0) { 7836 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7837 aa = aseq->a + ai[brow + ii] + k; 7838 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7839 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7840 } 7841 } 7842 } else { // use (index,index) value if provided 7843 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7844 PetscInt ii = index[iii]; 7845 aa = aseq->a + ai[brow + ii] + k; 7846 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7847 PetscInt jj = index[jjj]; 7848 val += PetscAbs(PetscRealPart(aa[jj])); 7849 } 7850 } 7851 } 7852 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7853 AA[k / bs] = val; 7854 } 7855 grow = Istart / bs + brow / bs; 7856 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7857 } 7858 // off-diag 7859 if (ismpiaij) { 7860 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7861 const PetscScalar *vals; 7862 const PetscInt *cols, *garray = aij->garray; 7863 7864 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7865 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7866 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7867 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7868 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7869 AA[k / bs] = 0; 7870 AJ[cidx] = garray[cols[k]] / bs; 7871 } 7872 nc = ncols / bs; 7873 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7874 if (index_size == 0) { 7875 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7876 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7877 for (PetscInt k = 0; k < ncols; k += bs) { 7878 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7879 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7880 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7881 } 7882 } 7883 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7884 } 7885 } else { // use (index,index) value if provided 7886 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7887 PetscInt ii = index[iii]; 7888 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7889 for (PetscInt k = 0; k < ncols; k += bs) { 7890 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7891 PetscInt jj = index[jjj]; 7892 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7893 } 7894 } 7895 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7896 } 7897 } 7898 grow = Istart / bs + brow / bs; 7899 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7900 } 7901 } 7902 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7903 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(PetscFree2(AA, AJ)); 7905 } else { 7906 const PetscScalar *vals; 7907 const PetscInt *idx; 7908 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7909 old_bs: 7910 /* 7911 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7912 */ 7913 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7914 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7915 if (isseqaij) { 7916 PetscInt max_d_nnz; 7917 7918 /* 7919 Determine exact preallocation count for (sequential) scalar matrix 7920 */ 7921 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7922 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7923 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7924 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7925 PetscCall(PetscFree3(w0, w1, w2)); 7926 } else if (ismpiaij) { 7927 Mat Daij, Oaij; 7928 const PetscInt *garray; 7929 PetscInt max_d_nnz; 7930 7931 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7932 /* 7933 Determine exact preallocation count for diagonal block portion of scalar matrix 7934 */ 7935 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7936 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7937 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7938 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7939 PetscCall(PetscFree3(w0, w1, w2)); 7940 /* 7941 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7942 */ 7943 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7944 o_nnz[jj] = 0; 7945 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7946 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7947 o_nnz[jj] += ncols; 7948 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7949 } 7950 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7951 } 7952 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7953 /* get scalar copy (norms) of matrix */ 7954 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7955 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7956 PetscCall(PetscFree2(d_nnz, o_nnz)); 7957 for (Ii = Istart; Ii < Iend; Ii++) { 7958 PetscInt dest_row = Ii / bs; 7959 7960 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7961 for (jj = 0; jj < ncols; jj++) { 7962 PetscInt dest_col = idx[jj] / bs; 7963 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7964 7965 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7966 } 7967 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7968 } 7969 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7970 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7971 } 7972 } else { 7973 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7974 else { 7975 Gmat = Amat; 7976 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7977 } 7978 if (isseqaij) { 7979 a = Gmat; 7980 b = NULL; 7981 } else { 7982 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7983 a = d->A; 7984 b = d->B; 7985 } 7986 if (filter >= 0 || scale) { 7987 /* take absolute value of each entry */ 7988 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7989 MatInfo info; 7990 PetscScalar *avals; 7991 7992 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7993 PetscCall(MatSeqAIJGetArray(c, &avals)); 7994 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7995 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7996 } 7997 } 7998 } 7999 if (symmetrize) { 8000 PetscBool isset, issym; 8001 8002 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8003 if (!isset || !issym) { 8004 Mat matTrans; 8005 8006 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8007 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8008 PetscCall(MatDestroy(&matTrans)); 8009 } 8010 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8011 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8012 if (scale) { 8013 /* scale c for all diagonal values = 1 or -1 */ 8014 Vec diag; 8015 8016 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8017 PetscCall(MatGetDiagonal(Gmat, diag)); 8018 PetscCall(VecReciprocal(diag)); 8019 PetscCall(VecSqrtAbs(diag)); 8020 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8021 PetscCall(VecDestroy(&diag)); 8022 } 8023 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8024 if (filter >= 0) { 8025 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8026 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8027 } 8028 *a_Gmat = Gmat; 8029 PetscFunctionReturn(PETSC_SUCCESS); 8030 } 8031 8032 /* 8033 Special version for direct calls from Fortran 8034 */ 8035 8036 /* Change these macros so can be used in void function */ 8037 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8038 #undef PetscCall 8039 #define PetscCall(...) \ 8040 do { \ 8041 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8042 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8043 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8044 return; \ 8045 } \ 8046 } while (0) 8047 8048 #undef SETERRQ 8049 #define SETERRQ(comm, ierr, ...) \ 8050 do { \ 8051 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8052 return; \ 8053 } while (0) 8054 8055 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8056 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8057 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8058 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8059 #else 8060 #endif 8061 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8062 { 8063 Mat mat = *mmat; 8064 PetscInt m = *mm, n = *mn; 8065 InsertMode addv = *maddv; 8066 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8067 PetscScalar value; 8068 8069 MatCheckPreallocated(mat, 1); 8070 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8071 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8072 { 8073 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8074 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8075 PetscBool roworiented = aij->roworiented; 8076 8077 /* Some Variables required in the macro */ 8078 Mat A = aij->A; 8079 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8080 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8081 MatScalar *aa; 8082 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8083 Mat B = aij->B; 8084 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8085 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8086 MatScalar *ba; 8087 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8088 * cannot use "#if defined" inside a macro. */ 8089 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8090 8091 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8092 PetscInt nonew = a->nonew; 8093 MatScalar *ap1, *ap2; 8094 8095 PetscFunctionBegin; 8096 PetscCall(MatSeqAIJGetArray(A, &aa)); 8097 PetscCall(MatSeqAIJGetArray(B, &ba)); 8098 for (i = 0; i < m; i++) { 8099 if (im[i] < 0) continue; 8100 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8101 if (im[i] >= rstart && im[i] < rend) { 8102 row = im[i] - rstart; 8103 lastcol1 = -1; 8104 rp1 = aj + ai[row]; 8105 ap1 = aa + ai[row]; 8106 rmax1 = aimax[row]; 8107 nrow1 = ailen[row]; 8108 low1 = 0; 8109 high1 = nrow1; 8110 lastcol2 = -1; 8111 rp2 = bj + bi[row]; 8112 ap2 = ba + bi[row]; 8113 rmax2 = bimax[row]; 8114 nrow2 = bilen[row]; 8115 low2 = 0; 8116 high2 = nrow2; 8117 8118 for (j = 0; j < n; j++) { 8119 if (roworiented) value = v[i * n + j]; 8120 else value = v[i + j * m]; 8121 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8122 if (in[j] >= cstart && in[j] < cend) { 8123 col = in[j] - cstart; 8124 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8125 } else if (in[j] < 0) continue; 8126 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8127 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8128 } else { 8129 if (mat->was_assembled) { 8130 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8131 #if defined(PETSC_USE_CTABLE) 8132 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8133 col--; 8134 #else 8135 col = aij->colmap[in[j]] - 1; 8136 #endif 8137 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8138 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8139 col = in[j]; 8140 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8141 B = aij->B; 8142 b = (Mat_SeqAIJ *)B->data; 8143 bimax = b->imax; 8144 bi = b->i; 8145 bilen = b->ilen; 8146 bj = b->j; 8147 rp2 = bj + bi[row]; 8148 ap2 = ba + bi[row]; 8149 rmax2 = bimax[row]; 8150 nrow2 = bilen[row]; 8151 low2 = 0; 8152 high2 = nrow2; 8153 bm = aij->B->rmap->n; 8154 ba = b->a; 8155 inserted = PETSC_FALSE; 8156 } 8157 } else col = in[j]; 8158 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8159 } 8160 } 8161 } else if (!aij->donotstash) { 8162 if (roworiented) { 8163 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8164 } else { 8165 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8166 } 8167 } 8168 } 8169 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8170 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8171 } 8172 PetscFunctionReturnVoid(); 8173 } 8174 8175 /* Undefining these here since they were redefined from their original definition above! No 8176 * other PETSc functions should be defined past this point, as it is impossible to recover the 8177 * original definitions */ 8178 #undef PetscCall 8179 #undef SETERRQ 8180