1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ, 2877 NULL, 2878 NULL, 2879 /*155*/ NULL, 2880 MatCopyHashToXAIJ_MPI_Hash}; 2881 2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatStoreValues(aij->A)); 2888 PetscCall(MatStoreValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2893 { 2894 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2895 2896 PetscFunctionBegin; 2897 PetscCall(MatRetrieveValues(aij->A)); 2898 PetscCall(MatRetrieveValues(aij->B)); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2903 { 2904 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2905 PetscMPIInt size; 2906 2907 PetscFunctionBegin; 2908 if (B->hash_active) { 2909 B->ops[0] = b->cops; 2910 B->hash_active = PETSC_FALSE; 2911 } 2912 PetscCall(PetscLayoutSetUp(B->rmap)); 2913 PetscCall(PetscLayoutSetUp(B->cmap)); 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2925 2926 MatSeqXAIJGetOptions_Private(b->B); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->B); 2933 2934 MatSeqXAIJGetOptions_Private(b->A); 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 MatSeqXAIJRestoreOptions_Private(b->A); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 PetscBool ondiagreset, offdiagreset, memoryreset; 2954 2955 PetscFunctionBegin; 2956 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2957 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2958 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2959 2960 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2961 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2962 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2963 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2964 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2965 2966 PetscCall(PetscLayoutSetUp(B->rmap)); 2967 PetscCall(PetscLayoutSetUp(B->cmap)); 2968 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2969 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2970 PetscCall(VecScatterDestroy(&b->Mvctx)); 2971 2972 B->preallocated = PETSC_TRUE; 2973 B->was_assembled = PETSC_FALSE; 2974 B->assembled = PETSC_FALSE; 2975 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2976 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2977 PetscFunctionReturn(PETSC_SUCCESS); 2978 } 2979 2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2981 { 2982 Mat mat; 2983 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2984 2985 PetscFunctionBegin; 2986 *newmat = NULL; 2987 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2988 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2989 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2990 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2991 a = (Mat_MPIAIJ *)mat->data; 2992 2993 mat->factortype = matin->factortype; 2994 mat->assembled = matin->assembled; 2995 mat->insertmode = NOT_SET_VALUES; 2996 2997 a->size = oldmat->size; 2998 a->rank = oldmat->rank; 2999 a->donotstash = oldmat->donotstash; 3000 a->roworiented = oldmat->roworiented; 3001 a->rowindices = NULL; 3002 a->rowvalues = NULL; 3003 a->getrowactive = PETSC_FALSE; 3004 3005 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3006 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3007 if (matin->hash_active) { 3008 PetscCall(MatSetUp(mat)); 3009 } else { 3010 mat->preallocated = matin->preallocated; 3011 if (oldmat->colmap) { 3012 #if defined(PETSC_USE_CTABLE) 3013 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3014 #else 3015 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3016 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3017 #endif 3018 } else a->colmap = NULL; 3019 if (oldmat->garray) { 3020 PetscInt len; 3021 len = oldmat->B->cmap->n; 3022 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3023 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3024 } else a->garray = NULL; 3025 3026 /* It may happen MatDuplicate is called with a non-assembled matrix 3027 In fact, MatDuplicate only requires the matrix to be preallocated 3028 This may happen inside a DMCreateMatrix_Shell */ 3029 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3030 if (oldmat->Mvctx) { 3031 a->Mvctx = oldmat->Mvctx; 3032 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3033 } 3034 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3035 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3036 } 3037 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3038 *newmat = mat; 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3043 { 3044 PetscBool isbinary, ishdf5; 3045 3046 PetscFunctionBegin; 3047 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3048 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3049 /* force binary viewer to load .info file if it has not yet done so */ 3050 PetscCall(PetscViewerSetUp(viewer)); 3051 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3052 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3053 if (isbinary) { 3054 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3055 } else if (ishdf5) { 3056 #if defined(PETSC_HAVE_HDF5) 3057 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3058 #else 3059 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3060 #endif 3061 } else { 3062 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3063 } 3064 PetscFunctionReturn(PETSC_SUCCESS); 3065 } 3066 3067 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3068 { 3069 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3070 PetscInt *rowidxs, *colidxs; 3071 PetscScalar *matvals; 3072 3073 PetscFunctionBegin; 3074 PetscCall(PetscViewerSetUp(viewer)); 3075 3076 /* read in matrix header */ 3077 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3078 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3079 M = header[1]; 3080 N = header[2]; 3081 nz = header[3]; 3082 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3083 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3084 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3085 3086 /* set block sizes from the viewer's .info file */ 3087 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3088 /* set global sizes if not set already */ 3089 if (mat->rmap->N < 0) mat->rmap->N = M; 3090 if (mat->cmap->N < 0) mat->cmap->N = N; 3091 PetscCall(PetscLayoutSetUp(mat->rmap)); 3092 PetscCall(PetscLayoutSetUp(mat->cmap)); 3093 3094 /* check if the matrix sizes are correct */ 3095 PetscCall(MatGetSize(mat, &rows, &cols)); 3096 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3097 3098 /* read in row lengths and build row indices */ 3099 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3100 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3102 rowidxs[0] = 0; 3103 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3104 if (nz != PETSC_INT_MAX) { 3105 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3106 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3107 } 3108 3109 /* read in column indices and matrix values */ 3110 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3111 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3112 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3113 /* store matrix indices and values */ 3114 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3115 PetscCall(PetscFree(rowidxs)); 3116 PetscCall(PetscFree2(colidxs, matvals)); 3117 PetscFunctionReturn(PETSC_SUCCESS); 3118 } 3119 3120 /* Not scalable because of ISAllGather() unless getting all columns. */ 3121 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3122 { 3123 IS iscol_local; 3124 PetscBool isstride; 3125 PetscMPIInt gisstride = 0; 3126 3127 PetscFunctionBegin; 3128 /* check if we are grabbing all columns*/ 3129 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3130 3131 if (isstride) { 3132 PetscInt start, len, mstart, mlen; 3133 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3134 PetscCall(ISGetLocalSize(iscol, &len)); 3135 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3136 if (mstart == start && mlen - mstart == len) gisstride = 1; 3137 } 3138 3139 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3140 if (gisstride) { 3141 PetscInt N; 3142 PetscCall(MatGetSize(mat, NULL, &N)); 3143 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3144 PetscCall(ISSetIdentity(iscol_local)); 3145 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3146 } else { 3147 PetscInt cbs; 3148 PetscCall(ISGetBlockSize(iscol, &cbs)); 3149 PetscCall(ISAllGather(iscol, &iscol_local)); 3150 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3151 } 3152 3153 *isseq = iscol_local; 3154 PetscFunctionReturn(PETSC_SUCCESS); 3155 } 3156 3157 /* 3158 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3159 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3160 3161 Input Parameters: 3162 + mat - matrix 3163 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3164 i.e., mat->rstart <= isrow[i] < mat->rend 3165 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3166 i.e., mat->cstart <= iscol[i] < mat->cend 3167 3168 Output Parameters: 3169 + isrow_d - sequential row index set for retrieving mat->A 3170 . iscol_d - sequential column index set for retrieving mat->A 3171 . iscol_o - sequential column index set for retrieving mat->B 3172 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3173 */ 3174 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3175 { 3176 Vec x, cmap; 3177 const PetscInt *is_idx; 3178 PetscScalar *xarray, *cmaparray; 3179 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3181 Mat B = a->B; 3182 Vec lvec = a->lvec, lcmap; 3183 PetscInt i, cstart, cend, Bn = B->cmap->N; 3184 MPI_Comm comm; 3185 VecScatter Mvctx = a->Mvctx; 3186 3187 PetscFunctionBegin; 3188 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3189 PetscCall(ISGetLocalSize(iscol, &ncols)); 3190 3191 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3192 PetscCall(MatCreateVecs(mat, &x, NULL)); 3193 PetscCall(VecSet(x, -1.0)); 3194 PetscCall(VecDuplicate(x, &cmap)); 3195 PetscCall(VecSet(cmap, -1.0)); 3196 3197 /* Get start indices */ 3198 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3199 isstart -= ncols; 3200 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3201 3202 PetscCall(ISGetIndices(iscol, &is_idx)); 3203 PetscCall(VecGetArray(x, &xarray)); 3204 PetscCall(VecGetArray(cmap, &cmaparray)); 3205 PetscCall(PetscMalloc1(ncols, &idx)); 3206 for (i = 0; i < ncols; i++) { 3207 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3208 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3209 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3210 } 3211 PetscCall(VecRestoreArray(x, &xarray)); 3212 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3213 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3214 3215 /* Get iscol_d */ 3216 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3217 PetscCall(ISGetBlockSize(iscol, &i)); 3218 PetscCall(ISSetBlockSize(*iscol_d, i)); 3219 3220 /* Get isrow_d */ 3221 PetscCall(ISGetLocalSize(isrow, &m)); 3222 rstart = mat->rmap->rstart; 3223 PetscCall(PetscMalloc1(m, &idx)); 3224 PetscCall(ISGetIndices(isrow, &is_idx)); 3225 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3226 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3227 3228 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3229 PetscCall(ISGetBlockSize(isrow, &i)); 3230 PetscCall(ISSetBlockSize(*isrow_d, i)); 3231 3232 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3233 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3234 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3235 3236 PetscCall(VecDuplicate(lvec, &lcmap)); 3237 3238 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3239 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3240 3241 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3242 /* off-process column indices */ 3243 count = 0; 3244 PetscCall(PetscMalloc1(Bn, &idx)); 3245 PetscCall(PetscMalloc1(Bn, &cmap1)); 3246 3247 PetscCall(VecGetArray(lvec, &xarray)); 3248 PetscCall(VecGetArray(lcmap, &cmaparray)); 3249 for (i = 0; i < Bn; i++) { 3250 if (PetscRealPart(xarray[i]) > -1.0) { 3251 idx[count] = i; /* local column index in off-diagonal part B */ 3252 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3253 count++; 3254 } 3255 } 3256 PetscCall(VecRestoreArray(lvec, &xarray)); 3257 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3258 3259 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3260 /* cannot ensure iscol_o has same blocksize as iscol! */ 3261 3262 PetscCall(PetscFree(idx)); 3263 *garray = cmap1; 3264 3265 PetscCall(VecDestroy(&x)); 3266 PetscCall(VecDestroy(&cmap)); 3267 PetscCall(VecDestroy(&lcmap)); 3268 PetscFunctionReturn(PETSC_SUCCESS); 3269 } 3270 3271 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3272 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3273 { 3274 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3275 Mat M = NULL; 3276 MPI_Comm comm; 3277 IS iscol_d, isrow_d, iscol_o; 3278 Mat Asub = NULL, Bsub = NULL; 3279 PetscInt n, count, M_size, N_size; 3280 3281 PetscFunctionBegin; 3282 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3283 3284 if (call == MAT_REUSE_MATRIX) { 3285 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3286 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3287 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3288 3289 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3290 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3291 3292 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3293 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3294 3295 /* Update diagonal and off-diagonal portions of submat */ 3296 asub = (Mat_MPIAIJ *)(*submat)->data; 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3298 PetscCall(ISGetLocalSize(iscol_o, &n)); 3299 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3300 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3301 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3302 3303 } else { /* call == MAT_INITIAL_MATRIX) */ 3304 PetscInt *garray, *garray_compact; 3305 PetscInt BsubN; 3306 3307 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3308 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3309 3310 /* Create local submatrices Asub and Bsub */ 3311 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3312 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3313 3314 // Compact garray so its not of size Bn 3315 PetscCall(ISGetSize(iscol_o, &count)); 3316 PetscCall(PetscMalloc1(count, &garray_compact)); 3317 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3318 3319 /* Create submatrix M */ 3320 PetscCall(ISGetSize(isrow, &M_size)); 3321 PetscCall(ISGetSize(iscol, &N_size)); 3322 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3323 3324 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3325 asub = (Mat_MPIAIJ *)M->data; 3326 3327 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3328 n = asub->B->cmap->N; 3329 if (BsubN > n) { 3330 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3331 const PetscInt *idx; 3332 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3333 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3334 3335 PetscCall(PetscMalloc1(n, &idx_new)); 3336 j = 0; 3337 PetscCall(ISGetIndices(iscol_o, &idx)); 3338 for (i = 0; i < n; i++) { 3339 if (j >= BsubN) break; 3340 while (subgarray[i] > garray[j]) j++; 3341 3342 if (subgarray[i] == garray[j]) { 3343 idx_new[i] = idx[j++]; 3344 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3345 } 3346 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3347 3348 PetscCall(ISDestroy(&iscol_o)); 3349 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3350 3351 } else if (BsubN < n) { 3352 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3353 } 3354 3355 PetscCall(PetscFree(garray)); 3356 *submat = M; 3357 3358 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3359 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3360 PetscCall(ISDestroy(&isrow_d)); 3361 3362 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3363 PetscCall(ISDestroy(&iscol_d)); 3364 3365 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3366 PetscCall(ISDestroy(&iscol_o)); 3367 } 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3372 { 3373 IS iscol_local = NULL, isrow_d; 3374 PetscInt csize; 3375 PetscInt n, i, j, start, end; 3376 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3377 MPI_Comm comm; 3378 3379 PetscFunctionBegin; 3380 /* If isrow has same processor distribution as mat, 3381 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3382 if (call == MAT_REUSE_MATRIX) { 3383 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3384 if (isrow_d) { 3385 sameRowDist = PETSC_TRUE; 3386 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3387 } else { 3388 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3389 if (iscol_local) { 3390 sameRowDist = PETSC_TRUE; 3391 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3392 } 3393 } 3394 } else { 3395 /* Check if isrow has same processor distribution as mat */ 3396 sameDist[0] = PETSC_FALSE; 3397 PetscCall(ISGetLocalSize(isrow, &n)); 3398 if (!n) { 3399 sameDist[0] = PETSC_TRUE; 3400 } else { 3401 PetscCall(ISGetMinMax(isrow, &i, &j)); 3402 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3403 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3404 } 3405 3406 /* Check if iscol has same processor distribution as mat */ 3407 sameDist[1] = PETSC_FALSE; 3408 PetscCall(ISGetLocalSize(iscol, &n)); 3409 if (!n) { 3410 sameDist[1] = PETSC_TRUE; 3411 } else { 3412 PetscCall(ISGetMinMax(iscol, &i, &j)); 3413 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3414 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3415 } 3416 3417 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3418 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3419 sameRowDist = tsameDist[0]; 3420 } 3421 3422 if (sameRowDist) { 3423 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3424 /* isrow and iscol have same processor distribution as mat */ 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } else { /* sameRowDist */ 3428 /* isrow has same processor distribution as mat */ 3429 if (call == MAT_INITIAL_MATRIX) { 3430 PetscBool sorted; 3431 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3432 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3433 PetscCall(ISGetSize(iscol, &i)); 3434 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3435 3436 PetscCall(ISSorted(iscol_local, &sorted)); 3437 if (sorted) { 3438 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 } else { /* call == MAT_REUSE_MATRIX */ 3443 IS iscol_sub; 3444 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3445 if (iscol_sub) { 3446 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 } 3450 } 3451 } 3452 3453 /* General case: iscol -> iscol_local which has global size of iscol */ 3454 if (call == MAT_REUSE_MATRIX) { 3455 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3456 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3457 } else { 3458 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3459 } 3460 3461 PetscCall(ISGetLocalSize(iscol, &csize)); 3462 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3463 3464 if (call == MAT_INITIAL_MATRIX) { 3465 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3466 PetscCall(ISDestroy(&iscol_local)); 3467 } 3468 PetscFunctionReturn(PETSC_SUCCESS); 3469 } 3470 3471 /*@C 3472 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3473 and "off-diagonal" part of the matrix in CSR format. 3474 3475 Collective 3476 3477 Input Parameters: 3478 + comm - MPI communicator 3479 . M - the global row size 3480 . N - the global column size 3481 . A - "diagonal" portion of matrix 3482 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3483 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3484 3485 Output Parameter: 3486 . mat - the matrix, with input `A` as its local diagonal matrix 3487 3488 Level: advanced 3489 3490 Notes: 3491 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3492 3493 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3494 3495 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3496 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3497 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3498 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3499 3500 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3501 3502 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3503 @*/ 3504 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3505 { 3506 PetscInt m, n; 3507 MatType mpi_mat_type; 3508 3509 PetscFunctionBegin; 3510 PetscCall(MatCreate(comm, mat)); 3511 PetscCall(MatGetSize(A, &m, &n)); 3512 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3513 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3514 3515 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3516 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3517 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3518 PetscCall(MatSetType(*mat, mpi_mat_type)); 3519 3520 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3521 3522 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3523 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3524 if (!garray) { 3525 const PetscScalar *ba; 3526 3527 B->nonzerostate++; 3528 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3529 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3530 } 3531 PetscCall(MatSetMPIAIJWithSplitSeqAIJ(*mat, A, B, garray)); 3532 PetscFunctionReturn(PETSC_SUCCESS); 3533 } 3534 3535 /* 3536 MatSetMPIAIJWithSplitSeqAIJ - Set the diag and offdiag matrices of a `MATMPIAIJ` matrix. 3537 It is similar to `MatCreateMPIAIJWithSplitArrays()`. This routine allows passing in 3538 B with local indices and the correct size, along with the accompanying 3539 garray, hence skipping compactification 3540 3541 Collective 3542 3543 Input Parameters: 3544 + mat - the MATMPIAIJ matrix, which should have its type and layout set, but should not have its diag, offdiag matrices set 3545 . A - the diag matrix using local col ids 3546 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3547 - garray - either `NULL` or the global index of `B` columns 3548 3549 Output Parameter: 3550 . mat - the updated `MATMPIAIJ` matrix 3551 3552 Level: advanced 3553 3554 Notes: 3555 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3556 3557 `A` and `B` become part of output mat. The user cannot use `A` and `B` anymore. 3558 3559 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3560 */ 3561 PETSC_INTERN PetscErrorCode MatSetMPIAIJWithSplitSeqAIJ(Mat mat, Mat A, Mat B, PetscInt *garray) 3562 { 3563 PetscFunctionBegin; 3564 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 3565 PetscInt m, n, M, N, Am, An, Bm, Bn; 3566 3567 PetscCall(MatGetSize(mat, &M, &N)); 3568 PetscCall(MatGetLocalSize(mat, &m, &n)); 3569 PetscCall(MatGetLocalSize(A, &Am, &An)); 3570 PetscCall(MatGetLocalSize(B, &Bm, &Bn)); 3571 3572 PetscCheck(m == Am && m == Bm, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of rows do not match"); 3573 PetscCheck(n == An, PETSC_COMM_SELF, PETSC_ERR_PLIB, "local number of columns do not match"); 3574 PetscCheck(!mpiaij->A && !mpiaij->B, PETSC_COMM_SELF, PETSC_ERR_PLIB, "A, B of the MPIAIJ matrix are not empty"); 3575 mpiaij->A = A; 3576 mpiaij->B = B; 3577 mpiaij->garray = garray; 3578 3579 mat->preallocated = PETSC_TRUE; 3580 mat->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3581 3582 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3583 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3584 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3585 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3586 */ 3587 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3588 PetscCall(MatSetOption(mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3589 PetscCall(MatSetOption(mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3590 PetscFunctionReturn(PETSC_SUCCESS); 3591 } 3592 3593 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3594 3595 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3596 { 3597 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3598 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3599 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3600 Mat M, Msub, B = a->B; 3601 MatScalar *aa; 3602 Mat_SeqAIJ *aij; 3603 PetscInt *garray = a->garray, *colsub, Ncols; 3604 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3605 IS iscol_sub, iscmap; 3606 const PetscInt *is_idx, *cmap; 3607 PetscBool allcolumns = PETSC_FALSE; 3608 MPI_Comm comm; 3609 3610 PetscFunctionBegin; 3611 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3612 if (call == MAT_REUSE_MATRIX) { 3613 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3614 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3615 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3616 3617 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3618 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3619 3620 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3621 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3622 3623 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3624 3625 } else { /* call == MAT_INITIAL_MATRIX) */ 3626 PetscBool flg; 3627 3628 PetscCall(ISGetLocalSize(iscol, &n)); 3629 PetscCall(ISGetSize(iscol, &Ncols)); 3630 3631 /* (1) iscol -> nonscalable iscol_local */ 3632 /* Check for special case: each processor gets entire matrix columns */ 3633 PetscCall(ISIdentity(iscol_local, &flg)); 3634 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3635 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3636 if (allcolumns) { 3637 iscol_sub = iscol_local; 3638 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3639 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3640 3641 } else { 3642 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3643 PetscInt *idx, *cmap1, k; 3644 PetscCall(PetscMalloc1(Ncols, &idx)); 3645 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3646 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3647 count = 0; 3648 k = 0; 3649 for (i = 0; i < Ncols; i++) { 3650 j = is_idx[i]; 3651 if (j >= cstart && j < cend) { 3652 /* diagonal part of mat */ 3653 idx[count] = j; 3654 cmap1[count++] = i; /* column index in submat */ 3655 } else if (Bn) { 3656 /* off-diagonal part of mat */ 3657 if (j == garray[k]) { 3658 idx[count] = j; 3659 cmap1[count++] = i; /* column index in submat */ 3660 } else if (j > garray[k]) { 3661 while (j > garray[k] && k < Bn - 1) k++; 3662 if (j == garray[k]) { 3663 idx[count] = j; 3664 cmap1[count++] = i; /* column index in submat */ 3665 } 3666 } 3667 } 3668 } 3669 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3670 3671 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3672 PetscCall(ISGetBlockSize(iscol, &cbs)); 3673 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3674 3675 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3676 } 3677 3678 /* (3) Create sequential Msub */ 3679 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3680 } 3681 3682 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3683 aij = (Mat_SeqAIJ *)Msub->data; 3684 ii = aij->i; 3685 PetscCall(ISGetIndices(iscmap, &cmap)); 3686 3687 /* 3688 m - number of local rows 3689 Ncols - number of columns (same on all processors) 3690 rstart - first row in new global matrix generated 3691 */ 3692 PetscCall(MatGetSize(Msub, &m, NULL)); 3693 3694 if (call == MAT_INITIAL_MATRIX) { 3695 /* (4) Create parallel newmat */ 3696 PetscMPIInt rank, size; 3697 PetscInt csize; 3698 3699 PetscCallMPI(MPI_Comm_size(comm, &size)); 3700 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3701 3702 /* 3703 Determine the number of non-zeros in the diagonal and off-diagonal 3704 portions of the matrix in order to do correct preallocation 3705 */ 3706 3707 /* first get start and end of "diagonal" columns */ 3708 PetscCall(ISGetLocalSize(iscol, &csize)); 3709 if (csize == PETSC_DECIDE) { 3710 PetscCall(ISGetSize(isrow, &mglobal)); 3711 if (mglobal == Ncols) { /* square matrix */ 3712 nlocal = m; 3713 } else { 3714 nlocal = Ncols / size + ((Ncols % size) > rank); 3715 } 3716 } else { 3717 nlocal = csize; 3718 } 3719 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3720 rstart = rend - nlocal; 3721 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3722 3723 /* next, compute all the lengths */ 3724 jj = aij->j; 3725 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3726 olens = dlens + m; 3727 for (i = 0; i < m; i++) { 3728 jend = ii[i + 1] - ii[i]; 3729 olen = 0; 3730 dlen = 0; 3731 for (j = 0; j < jend; j++) { 3732 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3733 else dlen++; 3734 jj++; 3735 } 3736 olens[i] = olen; 3737 dlens[i] = dlen; 3738 } 3739 3740 PetscCall(ISGetBlockSize(isrow, &bs)); 3741 PetscCall(ISGetBlockSize(iscol, &cbs)); 3742 3743 PetscCall(MatCreate(comm, &M)); 3744 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3745 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3746 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3747 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3748 PetscCall(PetscFree(dlens)); 3749 3750 } else { /* call == MAT_REUSE_MATRIX */ 3751 M = *newmat; 3752 PetscCall(MatGetLocalSize(M, &i, NULL)); 3753 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3754 PetscCall(MatZeroEntries(M)); 3755 /* 3756 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3757 rather than the slower MatSetValues(). 3758 */ 3759 M->was_assembled = PETSC_TRUE; 3760 M->assembled = PETSC_FALSE; 3761 } 3762 3763 /* (5) Set values of Msub to *newmat */ 3764 PetscCall(PetscMalloc1(count, &colsub)); 3765 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3766 3767 jj = aij->j; 3768 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3769 for (i = 0; i < m; i++) { 3770 row = rstart + i; 3771 nz = ii[i + 1] - ii[i]; 3772 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3773 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3774 jj += nz; 3775 aa += nz; 3776 } 3777 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3778 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3779 3780 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3781 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3782 3783 PetscCall(PetscFree(colsub)); 3784 3785 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3786 if (call == MAT_INITIAL_MATRIX) { 3787 *newmat = M; 3788 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3789 PetscCall(MatDestroy(&Msub)); 3790 3791 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3792 PetscCall(ISDestroy(&iscol_sub)); 3793 3794 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3795 PetscCall(ISDestroy(&iscmap)); 3796 3797 if (iscol_local) { 3798 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3799 PetscCall(ISDestroy(&iscol_local)); 3800 } 3801 } 3802 PetscFunctionReturn(PETSC_SUCCESS); 3803 } 3804 3805 /* 3806 Not great since it makes two copies of the submatrix, first an SeqAIJ 3807 in local and then by concatenating the local matrices the end result. 3808 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3809 3810 This requires a sequential iscol with all indices. 3811 */ 3812 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3813 { 3814 PetscMPIInt rank, size; 3815 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3816 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3817 Mat M, Mreuse; 3818 MatScalar *aa, *vwork; 3819 MPI_Comm comm; 3820 Mat_SeqAIJ *aij; 3821 PetscBool colflag, allcolumns = PETSC_FALSE; 3822 3823 PetscFunctionBegin; 3824 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3825 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3826 PetscCallMPI(MPI_Comm_size(comm, &size)); 3827 3828 /* Check for special case: each processor gets entire matrix columns */ 3829 PetscCall(ISIdentity(iscol, &colflag)); 3830 PetscCall(ISGetLocalSize(iscol, &n)); 3831 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3832 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3833 3834 if (call == MAT_REUSE_MATRIX) { 3835 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3836 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3837 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3838 } else { 3839 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3840 } 3841 3842 /* 3843 m - number of local rows 3844 n - number of columns (same on all processors) 3845 rstart - first row in new global matrix generated 3846 */ 3847 PetscCall(MatGetSize(Mreuse, &m, &n)); 3848 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3849 if (call == MAT_INITIAL_MATRIX) { 3850 aij = (Mat_SeqAIJ *)Mreuse->data; 3851 ii = aij->i; 3852 jj = aij->j; 3853 3854 /* 3855 Determine the number of non-zeros in the diagonal and off-diagonal 3856 portions of the matrix in order to do correct preallocation 3857 */ 3858 3859 /* first get start and end of "diagonal" columns */ 3860 if (csize == PETSC_DECIDE) { 3861 PetscCall(ISGetSize(isrow, &mglobal)); 3862 if (mglobal == n) { /* square matrix */ 3863 nlocal = m; 3864 } else { 3865 nlocal = n / size + ((n % size) > rank); 3866 } 3867 } else { 3868 nlocal = csize; 3869 } 3870 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3871 rstart = rend - nlocal; 3872 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3873 3874 /* next, compute all the lengths */ 3875 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3876 olens = dlens + m; 3877 for (i = 0; i < m; i++) { 3878 jend = ii[i + 1] - ii[i]; 3879 olen = 0; 3880 dlen = 0; 3881 for (j = 0; j < jend; j++) { 3882 if (*jj < rstart || *jj >= rend) olen++; 3883 else dlen++; 3884 jj++; 3885 } 3886 olens[i] = olen; 3887 dlens[i] = dlen; 3888 } 3889 PetscCall(MatCreate(comm, &M)); 3890 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3891 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3892 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3893 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3894 PetscCall(PetscFree(dlens)); 3895 } else { 3896 PetscInt ml, nl; 3897 3898 M = *newmat; 3899 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3900 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3901 PetscCall(MatZeroEntries(M)); 3902 /* 3903 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3904 rather than the slower MatSetValues(). 3905 */ 3906 M->was_assembled = PETSC_TRUE; 3907 M->assembled = PETSC_FALSE; 3908 } 3909 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3910 aij = (Mat_SeqAIJ *)Mreuse->data; 3911 ii = aij->i; 3912 jj = aij->j; 3913 3914 /* trigger copy to CPU if needed */ 3915 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3916 for (i = 0; i < m; i++) { 3917 row = rstart + i; 3918 nz = ii[i + 1] - ii[i]; 3919 cwork = jj; 3920 jj = PetscSafePointerPlusOffset(jj, nz); 3921 vwork = aa; 3922 aa = PetscSafePointerPlusOffset(aa, nz); 3923 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3924 } 3925 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3926 3927 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3928 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3929 *newmat = M; 3930 3931 /* save submatrix used in processor for next request */ 3932 if (call == MAT_INITIAL_MATRIX) { 3933 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3934 PetscCall(MatDestroy(&Mreuse)); 3935 } 3936 PetscFunctionReturn(PETSC_SUCCESS); 3937 } 3938 3939 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3940 { 3941 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3942 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3943 const PetscInt *JJ; 3944 PetscBool nooffprocentries; 3945 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3946 3947 PetscFunctionBegin; 3948 PetscCall(PetscLayoutSetUp(B->rmap)); 3949 PetscCall(PetscLayoutSetUp(B->cmap)); 3950 m = B->rmap->n; 3951 cstart = B->cmap->rstart; 3952 cend = B->cmap->rend; 3953 rstart = B->rmap->rstart; 3954 irstart = Ii[0]; 3955 3956 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3957 3958 if (PetscDefined(USE_DEBUG)) { 3959 for (i = 0; i < m; i++) { 3960 nnz = Ii[i + 1] - Ii[i]; 3961 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3962 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3963 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3964 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3965 } 3966 } 3967 3968 for (i = 0; i < m; i++) { 3969 nnz = Ii[i + 1] - Ii[i]; 3970 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3971 nnz_max = PetscMax(nnz_max, nnz); 3972 d = 0; 3973 for (j = 0; j < nnz; j++) { 3974 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3975 } 3976 d_nnz[i] = d; 3977 o_nnz[i] = nnz - d; 3978 } 3979 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3980 PetscCall(PetscFree2(d_nnz, o_nnz)); 3981 3982 for (i = 0; i < m; i++) { 3983 ii = i + rstart; 3984 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3985 } 3986 nooffprocentries = B->nooffprocentries; 3987 B->nooffprocentries = PETSC_TRUE; 3988 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3989 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3990 B->nooffprocentries = nooffprocentries; 3991 3992 /* count number of entries below block diagonal */ 3993 PetscCall(PetscFree(Aij->ld)); 3994 PetscCall(PetscCalloc1(m, &ld)); 3995 Aij->ld = ld; 3996 for (i = 0; i < m; i++) { 3997 nnz = Ii[i + 1] - Ii[i]; 3998 j = 0; 3999 while (j < nnz && J[j] < cstart) j++; 4000 ld[i] = j; 4001 if (J) J += nnz; 4002 } 4003 4004 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 4005 PetscFunctionReturn(PETSC_SUCCESS); 4006 } 4007 4008 /*@ 4009 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 4010 (the default parallel PETSc format). 4011 4012 Collective 4013 4014 Input Parameters: 4015 + B - the matrix 4016 . i - the indices into `j` for the start of each local row (indices start with zero) 4017 . j - the column indices for each local row (indices start with zero) 4018 - v - optional values in the matrix 4019 4020 Level: developer 4021 4022 Notes: 4023 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 4024 thus you CANNOT change the matrix entries by changing the values of `v` after you have 4025 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4026 4027 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4028 4029 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4030 4031 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4032 4033 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4034 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4035 4036 The format which is used for the sparse matrix input, is equivalent to a 4037 row-major ordering.. i.e for the following matrix, the input data expected is 4038 as shown 4039 .vb 4040 1 0 0 4041 2 0 3 P0 4042 ------- 4043 4 5 6 P1 4044 4045 Process0 [P0] rows_owned=[0,1] 4046 i = {0,1,3} [size = nrow+1 = 2+1] 4047 j = {0,0,2} [size = 3] 4048 v = {1,2,3} [size = 3] 4049 4050 Process1 [P1] rows_owned=[2] 4051 i = {0,3} [size = nrow+1 = 1+1] 4052 j = {0,1,2} [size = 3] 4053 v = {4,5,6} [size = 3] 4054 .ve 4055 4056 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4057 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4058 @*/ 4059 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4060 { 4061 PetscFunctionBegin; 4062 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4063 PetscFunctionReturn(PETSC_SUCCESS); 4064 } 4065 4066 /*@ 4067 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4068 (the default parallel PETSc format). For good matrix assembly performance 4069 the user should preallocate the matrix storage by setting the parameters 4070 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4071 4072 Collective 4073 4074 Input Parameters: 4075 + B - the matrix 4076 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4077 (same value is used for all local rows) 4078 . d_nnz - array containing the number of nonzeros in the various rows of the 4079 DIAGONAL portion of the local submatrix (possibly different for each row) 4080 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4081 The size of this array is equal to the number of local rows, i.e 'm'. 4082 For matrices that will be factored, you must leave room for (and set) 4083 the diagonal entry even if it is zero. 4084 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4085 submatrix (same value is used for all local rows). 4086 - o_nnz - array containing the number of nonzeros in the various rows of the 4087 OFF-DIAGONAL portion of the local submatrix (possibly different for 4088 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4089 structure. The size of this array is equal to the number 4090 of local rows, i.e 'm'. 4091 4092 Example Usage: 4093 Consider the following 8x8 matrix with 34 non-zero values, that is 4094 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4095 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4096 as follows 4097 4098 .vb 4099 1 2 0 | 0 3 0 | 0 4 4100 Proc0 0 5 6 | 7 0 0 | 8 0 4101 9 0 10 | 11 0 0 | 12 0 4102 ------------------------------------- 4103 13 0 14 | 15 16 17 | 0 0 4104 Proc1 0 18 0 | 19 20 21 | 0 0 4105 0 0 0 | 22 23 0 | 24 0 4106 ------------------------------------- 4107 Proc2 25 26 27 | 0 0 28 | 29 0 4108 30 0 0 | 31 32 33 | 0 34 4109 .ve 4110 4111 This can be represented as a collection of submatrices as 4112 .vb 4113 A B C 4114 D E F 4115 G H I 4116 .ve 4117 4118 Where the submatrices A,B,C are owned by proc0, D,E,F are 4119 owned by proc1, G,H,I are owned by proc2. 4120 4121 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4122 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4123 The 'M','N' parameters are 8,8, and have the same values on all procs. 4124 4125 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4126 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4127 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4128 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4129 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4130 matrix, and [DF] as another `MATSEQAIJ` matrix. 4131 4132 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4133 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4134 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4135 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4136 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4137 In this case, the values of `d_nz`, `o_nz` are 4138 .vb 4139 proc0 dnz = 2, o_nz = 2 4140 proc1 dnz = 3, o_nz = 2 4141 proc2 dnz = 1, o_nz = 4 4142 .ve 4143 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4144 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4145 for proc3. i.e we are using 12+15+10=37 storage locations to store 4146 34 values. 4147 4148 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4149 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4150 In the above case the values for `d_nnz`, `o_nnz` are 4151 .vb 4152 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4153 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4154 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4155 .ve 4156 Here the space allocated is sum of all the above values i.e 34, and 4157 hence pre-allocation is perfect. 4158 4159 Level: intermediate 4160 4161 Notes: 4162 If the *_nnz parameter is given then the *_nz parameter is ignored 4163 4164 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4165 storage. The stored row and column indices begin with zero. 4166 See [Sparse Matrices](sec_matsparse) for details. 4167 4168 The parallel matrix is partitioned such that the first m0 rows belong to 4169 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4170 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4171 4172 The DIAGONAL portion of the local submatrix of a processor can be defined 4173 as the submatrix which is obtained by extraction the part corresponding to 4174 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4175 first row that belongs to the processor, r2 is the last row belonging to 4176 the this processor, and c1-c2 is range of indices of the local part of a 4177 vector suitable for applying the matrix to. This is an mxn matrix. In the 4178 common case of a square matrix, the row and column ranges are the same and 4179 the DIAGONAL part is also square. The remaining portion of the local 4180 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4181 4182 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4183 4184 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4185 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4186 You can also run with the option `-info` and look for messages with the string 4187 malloc in them to see if additional memory allocation was needed. 4188 4189 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4190 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4191 @*/ 4192 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4193 { 4194 PetscFunctionBegin; 4195 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4196 PetscValidType(B, 1); 4197 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4198 PetscFunctionReturn(PETSC_SUCCESS); 4199 } 4200 4201 /*@ 4202 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4203 CSR format for the local rows. 4204 4205 Collective 4206 4207 Input Parameters: 4208 + comm - MPI communicator 4209 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4210 . n - This value should be the same as the local size used in creating the 4211 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4212 calculated if `N` is given) For square matrices n is almost always `m`. 4213 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4214 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4215 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4216 . j - global column indices 4217 - a - optional matrix values 4218 4219 Output Parameter: 4220 . mat - the matrix 4221 4222 Level: intermediate 4223 4224 Notes: 4225 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4226 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4227 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4228 4229 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4230 4231 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4232 4233 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4234 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4235 4236 The format which is used for the sparse matrix input, is equivalent to a 4237 row-major ordering, i.e., for the following matrix, the input data expected is 4238 as shown 4239 .vb 4240 1 0 0 4241 2 0 3 P0 4242 ------- 4243 4 5 6 P1 4244 4245 Process0 [P0] rows_owned=[0,1] 4246 i = {0,1,3} [size = nrow+1 = 2+1] 4247 j = {0,0,2} [size = 3] 4248 v = {1,2,3} [size = 3] 4249 4250 Process1 [P1] rows_owned=[2] 4251 i = {0,3} [size = nrow+1 = 1+1] 4252 j = {0,1,2} [size = 3] 4253 v = {4,5,6} [size = 3] 4254 .ve 4255 4256 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4257 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4258 @*/ 4259 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4260 { 4261 PetscFunctionBegin; 4262 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4263 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4264 PetscCall(MatCreate(comm, mat)); 4265 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4266 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4267 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4268 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4269 PetscFunctionReturn(PETSC_SUCCESS); 4270 } 4271 4272 /*@ 4273 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4274 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4275 from `MatCreateMPIAIJWithArrays()` 4276 4277 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4278 4279 Collective 4280 4281 Input Parameters: 4282 + mat - the matrix 4283 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4284 . n - This value should be the same as the local size used in creating the 4285 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4286 calculated if N is given) For square matrices n is almost always m. 4287 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4288 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4289 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4290 . J - column indices 4291 - v - matrix values 4292 4293 Level: deprecated 4294 4295 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4296 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4297 @*/ 4298 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4299 { 4300 PetscInt nnz, i; 4301 PetscBool nooffprocentries; 4302 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4304 PetscScalar *ad, *ao; 4305 PetscInt ldi, Iii, md; 4306 const PetscInt *Adi = Ad->i; 4307 PetscInt *ld = Aij->ld; 4308 4309 PetscFunctionBegin; 4310 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4311 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4312 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4313 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4314 4315 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4316 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4317 4318 for (i = 0; i < m; i++) { 4319 if (PetscDefined(USE_DEBUG)) { 4320 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4321 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4322 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4323 } 4324 } 4325 nnz = Ii[i + 1] - Ii[i]; 4326 Iii = Ii[i]; 4327 ldi = ld[i]; 4328 md = Adi[i + 1] - Adi[i]; 4329 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4330 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4331 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4332 ad += md; 4333 ao += nnz - md; 4334 } 4335 nooffprocentries = mat->nooffprocentries; 4336 mat->nooffprocentries = PETSC_TRUE; 4337 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4338 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4339 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4340 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4341 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4342 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4343 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4344 mat->nooffprocentries = nooffprocentries; 4345 PetscFunctionReturn(PETSC_SUCCESS); 4346 } 4347 4348 /*@ 4349 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4350 4351 Collective 4352 4353 Input Parameters: 4354 + mat - the matrix 4355 - v - matrix values, stored by row 4356 4357 Level: intermediate 4358 4359 Notes: 4360 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4361 4362 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4363 4364 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4365 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4366 @*/ 4367 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4368 { 4369 PetscInt nnz, i, m; 4370 PetscBool nooffprocentries; 4371 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4372 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4373 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4374 PetscScalar *ad, *ao; 4375 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4376 PetscInt ldi, Iii, md; 4377 PetscInt *ld = Aij->ld; 4378 4379 PetscFunctionBegin; 4380 m = mat->rmap->n; 4381 4382 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4383 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4384 Iii = 0; 4385 for (i = 0; i < m; i++) { 4386 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4387 ldi = ld[i]; 4388 md = Adi[i + 1] - Adi[i]; 4389 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4390 ad += md; 4391 if (ao) { 4392 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4393 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4394 ao += nnz - md; 4395 } 4396 Iii += nnz; 4397 } 4398 nooffprocentries = mat->nooffprocentries; 4399 mat->nooffprocentries = PETSC_TRUE; 4400 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4401 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4402 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4403 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4404 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4405 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4406 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4407 mat->nooffprocentries = nooffprocentries; 4408 PetscFunctionReturn(PETSC_SUCCESS); 4409 } 4410 4411 /*@ 4412 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4413 (the default parallel PETSc format). For good matrix assembly performance 4414 the user should preallocate the matrix storage by setting the parameters 4415 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4416 4417 Collective 4418 4419 Input Parameters: 4420 + comm - MPI communicator 4421 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4422 This value should be the same as the local size used in creating the 4423 y vector for the matrix-vector product y = Ax. 4424 . n - This value should be the same as the local size used in creating the 4425 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4426 calculated if N is given) For square matrices n is almost always m. 4427 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4428 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4429 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4430 (same value is used for all local rows) 4431 . d_nnz - array containing the number of nonzeros in the various rows of the 4432 DIAGONAL portion of the local submatrix (possibly different for each row) 4433 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4434 The size of this array is equal to the number of local rows, i.e 'm'. 4435 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4436 submatrix (same value is used for all local rows). 4437 - o_nnz - array containing the number of nonzeros in the various rows of the 4438 OFF-DIAGONAL portion of the local submatrix (possibly different for 4439 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4440 structure. The size of this array is equal to the number 4441 of local rows, i.e 'm'. 4442 4443 Output Parameter: 4444 . A - the matrix 4445 4446 Options Database Keys: 4447 + -mat_no_inode - Do not use inodes 4448 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4449 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4450 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4451 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4452 4453 Level: intermediate 4454 4455 Notes: 4456 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4457 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4458 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4459 4460 If the *_nnz parameter is given then the *_nz parameter is ignored 4461 4462 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4463 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4464 storage requirements for this matrix. 4465 4466 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4467 processor than it must be used on all processors that share the object for 4468 that argument. 4469 4470 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4471 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4472 4473 The user MUST specify either the local or global matrix dimensions 4474 (possibly both). 4475 4476 The parallel matrix is partitioned across processors such that the 4477 first `m0` rows belong to process 0, the next `m1` rows belong to 4478 process 1, the next `m2` rows belong to process 2, etc., where 4479 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4480 values corresponding to [m x N] submatrix. 4481 4482 The columns are logically partitioned with the n0 columns belonging 4483 to 0th partition, the next n1 columns belonging to the next 4484 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4485 4486 The DIAGONAL portion of the local submatrix on any given processor 4487 is the submatrix corresponding to the rows and columns m,n 4488 corresponding to the given processor. i.e diagonal matrix on 4489 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4490 etc. The remaining portion of the local submatrix [m x (N-n)] 4491 constitute the OFF-DIAGONAL portion. The example below better 4492 illustrates this concept. The two matrices, the DIAGONAL portion and 4493 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4494 4495 For a square global matrix we define each processor's diagonal portion 4496 to be its local rows and the corresponding columns (a square submatrix); 4497 each processor's off-diagonal portion encompasses the remainder of the 4498 local matrix (a rectangular submatrix). 4499 4500 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4501 4502 When calling this routine with a single process communicator, a matrix of 4503 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4504 type of communicator, use the construction mechanism 4505 .vb 4506 MatCreate(..., &A); 4507 MatSetType(A, MATMPIAIJ); 4508 MatSetSizes(A, m, n, M, N); 4509 MatMPIAIJSetPreallocation(A, ...); 4510 .ve 4511 4512 By default, this format uses inodes (identical nodes) when possible. 4513 We search for consecutive rows with the same nonzero structure, thereby 4514 reusing matrix information to achieve increased efficiency. 4515 4516 Example Usage: 4517 Consider the following 8x8 matrix with 34 non-zero values, that is 4518 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4519 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4520 as follows 4521 4522 .vb 4523 1 2 0 | 0 3 0 | 0 4 4524 Proc0 0 5 6 | 7 0 0 | 8 0 4525 9 0 10 | 11 0 0 | 12 0 4526 ------------------------------------- 4527 13 0 14 | 15 16 17 | 0 0 4528 Proc1 0 18 0 | 19 20 21 | 0 0 4529 0 0 0 | 22 23 0 | 24 0 4530 ------------------------------------- 4531 Proc2 25 26 27 | 0 0 28 | 29 0 4532 30 0 0 | 31 32 33 | 0 34 4533 .ve 4534 4535 This can be represented as a collection of submatrices as 4536 4537 .vb 4538 A B C 4539 D E F 4540 G H I 4541 .ve 4542 4543 Where the submatrices A,B,C are owned by proc0, D,E,F are 4544 owned by proc1, G,H,I are owned by proc2. 4545 4546 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4547 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4548 The 'M','N' parameters are 8,8, and have the same values on all procs. 4549 4550 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4551 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4552 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4553 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4554 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4555 matrix, and [DF] as another SeqAIJ matrix. 4556 4557 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4558 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4559 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4560 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4561 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4562 In this case, the values of `d_nz`,`o_nz` are 4563 .vb 4564 proc0 dnz = 2, o_nz = 2 4565 proc1 dnz = 3, o_nz = 2 4566 proc2 dnz = 1, o_nz = 4 4567 .ve 4568 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4569 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4570 for proc3. i.e we are using 12+15+10=37 storage locations to store 4571 34 values. 4572 4573 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4574 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4575 In the above case the values for d_nnz,o_nnz are 4576 .vb 4577 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4578 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4579 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4580 .ve 4581 Here the space allocated is sum of all the above values i.e 34, and 4582 hence pre-allocation is perfect. 4583 4584 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4585 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4586 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4587 @*/ 4588 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4589 { 4590 PetscMPIInt size; 4591 4592 PetscFunctionBegin; 4593 PetscCall(MatCreate(comm, A)); 4594 PetscCall(MatSetSizes(*A, m, n, M, N)); 4595 PetscCallMPI(MPI_Comm_size(comm, &size)); 4596 if (size > 1) { 4597 PetscCall(MatSetType(*A, MATMPIAIJ)); 4598 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4599 } else { 4600 PetscCall(MatSetType(*A, MATSEQAIJ)); 4601 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4602 } 4603 PetscFunctionReturn(PETSC_SUCCESS); 4604 } 4605 4606 /*@C 4607 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4608 4609 Not Collective 4610 4611 Input Parameter: 4612 . A - The `MATMPIAIJ` matrix 4613 4614 Output Parameters: 4615 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4616 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4617 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4618 4619 Level: intermediate 4620 4621 Note: 4622 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4623 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4624 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4625 local column numbers to global column numbers in the original matrix. 4626 4627 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4628 @*/ 4629 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4630 { 4631 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4632 PetscBool flg; 4633 4634 PetscFunctionBegin; 4635 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4636 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4637 if (Ad) *Ad = a->A; 4638 if (Ao) *Ao = a->B; 4639 if (colmap) *colmap = a->garray; 4640 PetscFunctionReturn(PETSC_SUCCESS); 4641 } 4642 4643 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4644 { 4645 PetscInt m, N, i, rstart, nnz, Ii; 4646 PetscInt *indx; 4647 PetscScalar *values; 4648 MatType rootType; 4649 4650 PetscFunctionBegin; 4651 PetscCall(MatGetSize(inmat, &m, &N)); 4652 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4653 PetscInt *dnz, *onz, sum, bs, cbs; 4654 4655 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4656 /* Check sum(n) = N */ 4657 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4658 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4659 4660 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4661 rstart -= m; 4662 4663 MatPreallocateBegin(comm, m, n, dnz, onz); 4664 for (i = 0; i < m; i++) { 4665 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4666 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4667 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4668 } 4669 4670 PetscCall(MatCreate(comm, outmat)); 4671 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4672 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4673 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4674 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4675 PetscCall(MatSetType(*outmat, rootType)); 4676 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4677 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4678 MatPreallocateEnd(dnz, onz); 4679 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4680 } 4681 4682 /* numeric phase */ 4683 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4684 for (i = 0; i < m; i++) { 4685 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4686 Ii = i + rstart; 4687 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4688 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4689 } 4690 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4691 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4692 PetscFunctionReturn(PETSC_SUCCESS); 4693 } 4694 4695 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4696 { 4697 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4698 4699 PetscFunctionBegin; 4700 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4701 PetscCall(PetscFree(merge->id_r)); 4702 PetscCall(PetscFree(merge->len_s)); 4703 PetscCall(PetscFree(merge->len_r)); 4704 PetscCall(PetscFree(merge->bi)); 4705 PetscCall(PetscFree(merge->bj)); 4706 PetscCall(PetscFree(merge->buf_ri[0])); 4707 PetscCall(PetscFree(merge->buf_ri)); 4708 PetscCall(PetscFree(merge->buf_rj[0])); 4709 PetscCall(PetscFree(merge->buf_rj)); 4710 PetscCall(PetscFree(merge->coi)); 4711 PetscCall(PetscFree(merge->coj)); 4712 PetscCall(PetscFree(merge->owners_co)); 4713 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4714 PetscCall(PetscFree(merge)); 4715 PetscFunctionReturn(PETSC_SUCCESS); 4716 } 4717 4718 #include <../src/mat/utils/freespace.h> 4719 #include <petscbt.h> 4720 4721 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4722 { 4723 MPI_Comm comm; 4724 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4725 PetscMPIInt size, rank, taga, *len_s; 4726 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4727 PetscMPIInt proc, k; 4728 PetscInt **buf_ri, **buf_rj; 4729 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4730 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4731 MPI_Request *s_waits, *r_waits; 4732 MPI_Status *status; 4733 const MatScalar *aa, *a_a; 4734 MatScalar **abuf_r, *ba_i; 4735 Mat_Merge_SeqsToMPI *merge; 4736 PetscContainer container; 4737 4738 PetscFunctionBegin; 4739 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4740 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4741 4742 PetscCallMPI(MPI_Comm_size(comm, &size)); 4743 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4744 4745 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4746 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4747 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4748 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4749 aa = a_a; 4750 4751 bi = merge->bi; 4752 bj = merge->bj; 4753 buf_ri = merge->buf_ri; 4754 buf_rj = merge->buf_rj; 4755 4756 PetscCall(PetscMalloc1(size, &status)); 4757 owners = merge->rowmap->range; 4758 len_s = merge->len_s; 4759 4760 /* send and recv matrix values */ 4761 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4762 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4763 4764 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4765 for (proc = 0, k = 0; proc < size; proc++) { 4766 if (!len_s[proc]) continue; 4767 i = owners[proc]; 4768 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4769 k++; 4770 } 4771 4772 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4773 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4774 PetscCall(PetscFree(status)); 4775 4776 PetscCall(PetscFree(s_waits)); 4777 PetscCall(PetscFree(r_waits)); 4778 4779 /* insert mat values of mpimat */ 4780 PetscCall(PetscMalloc1(N, &ba_i)); 4781 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4782 4783 for (k = 0; k < merge->nrecv; k++) { 4784 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4785 nrows = *buf_ri_k[k]; 4786 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4787 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4788 } 4789 4790 /* set values of ba */ 4791 m = merge->rowmap->n; 4792 for (i = 0; i < m; i++) { 4793 arow = owners[rank] + i; 4794 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4795 bnzi = bi[i + 1] - bi[i]; 4796 PetscCall(PetscArrayzero(ba_i, bnzi)); 4797 4798 /* add local non-zero vals of this proc's seqmat into ba */ 4799 anzi = ai[arow + 1] - ai[arow]; 4800 aj = a->j + ai[arow]; 4801 aa = a_a + ai[arow]; 4802 nextaj = 0; 4803 for (j = 0; nextaj < anzi; j++) { 4804 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4805 ba_i[j] += aa[nextaj++]; 4806 } 4807 } 4808 4809 /* add received vals into ba */ 4810 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4811 /* i-th row */ 4812 if (i == *nextrow[k]) { 4813 anzi = *(nextai[k] + 1) - *nextai[k]; 4814 aj = buf_rj[k] + *nextai[k]; 4815 aa = abuf_r[k] + *nextai[k]; 4816 nextaj = 0; 4817 for (j = 0; nextaj < anzi; j++) { 4818 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4819 ba_i[j] += aa[nextaj++]; 4820 } 4821 } 4822 nextrow[k]++; 4823 nextai[k]++; 4824 } 4825 } 4826 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4827 } 4828 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4829 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4830 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4831 4832 PetscCall(PetscFree(abuf_r[0])); 4833 PetscCall(PetscFree(abuf_r)); 4834 PetscCall(PetscFree(ba_i)); 4835 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4836 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4837 PetscFunctionReturn(PETSC_SUCCESS); 4838 } 4839 4840 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4841 { 4842 Mat B_mpi; 4843 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4844 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4845 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4846 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4847 PetscInt len, *dnz, *onz, bs, cbs; 4848 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4849 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4850 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4851 MPI_Status *status; 4852 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4853 PetscBT lnkbt; 4854 Mat_Merge_SeqsToMPI *merge; 4855 PetscContainer container; 4856 4857 PetscFunctionBegin; 4858 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4859 4860 /* make sure it is a PETSc comm */ 4861 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4862 PetscCallMPI(MPI_Comm_size(comm, &size)); 4863 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4864 4865 PetscCall(PetscNew(&merge)); 4866 PetscCall(PetscMalloc1(size, &status)); 4867 4868 /* determine row ownership */ 4869 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4870 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4871 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4872 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4873 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4874 PetscCall(PetscMalloc1(size, &len_si)); 4875 PetscCall(PetscMalloc1(size, &merge->len_s)); 4876 4877 m = merge->rowmap->n; 4878 owners = merge->rowmap->range; 4879 4880 /* determine the number of messages to send, their lengths */ 4881 len_s = merge->len_s; 4882 4883 len = 0; /* length of buf_si[] */ 4884 merge->nsend = 0; 4885 for (PetscMPIInt proc = 0; proc < size; proc++) { 4886 len_si[proc] = 0; 4887 if (proc == rank) { 4888 len_s[proc] = 0; 4889 } else { 4890 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4891 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4892 } 4893 if (len_s[proc]) { 4894 merge->nsend++; 4895 nrows = 0; 4896 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4897 if (ai[i + 1] > ai[i]) nrows++; 4898 } 4899 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4900 len += len_si[proc]; 4901 } 4902 } 4903 4904 /* determine the number and length of messages to receive for ij-structure */ 4905 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4906 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4907 4908 /* post the Irecv of j-structure */ 4909 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4910 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4911 4912 /* post the Isend of j-structure */ 4913 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4914 4915 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4916 if (!len_s[proc]) continue; 4917 i = owners[proc]; 4918 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4919 k++; 4920 } 4921 4922 /* receives and sends of j-structure are complete */ 4923 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4924 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4925 4926 /* send and recv i-structure */ 4927 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4928 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4929 4930 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4931 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4932 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4933 if (!len_s[proc]) continue; 4934 /* form outgoing message for i-structure: 4935 buf_si[0]: nrows to be sent 4936 [1:nrows]: row index (global) 4937 [nrows+1:2*nrows+1]: i-structure index 4938 */ 4939 nrows = len_si[proc] / 2 - 1; 4940 buf_si_i = buf_si + nrows + 1; 4941 buf_si[0] = nrows; 4942 buf_si_i[0] = 0; 4943 nrows = 0; 4944 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4945 anzi = ai[i + 1] - ai[i]; 4946 if (anzi) { 4947 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4948 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4949 nrows++; 4950 } 4951 } 4952 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4953 k++; 4954 buf_si += len_si[proc]; 4955 } 4956 4957 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4958 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4959 4960 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4961 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4962 4963 PetscCall(PetscFree(len_si)); 4964 PetscCall(PetscFree(len_ri)); 4965 PetscCall(PetscFree(rj_waits)); 4966 PetscCall(PetscFree2(si_waits, sj_waits)); 4967 PetscCall(PetscFree(ri_waits)); 4968 PetscCall(PetscFree(buf_s)); 4969 PetscCall(PetscFree(status)); 4970 4971 /* compute a local seq matrix in each processor */ 4972 /* allocate bi array and free space for accumulating nonzero column info */ 4973 PetscCall(PetscMalloc1(m + 1, &bi)); 4974 bi[0] = 0; 4975 4976 /* create and initialize a linked list */ 4977 nlnk = N + 1; 4978 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4979 4980 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4981 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4982 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4983 4984 current_space = free_space; 4985 4986 /* determine symbolic info for each local row */ 4987 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4988 4989 for (k = 0; k < merge->nrecv; k++) { 4990 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4991 nrows = *buf_ri_k[k]; 4992 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4993 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4994 } 4995 4996 MatPreallocateBegin(comm, m, n, dnz, onz); 4997 len = 0; 4998 for (i = 0; i < m; i++) { 4999 bnzi = 0; 5000 /* add local non-zero cols of this proc's seqmat into lnk */ 5001 arow = owners[rank] + i; 5002 anzi = ai[arow + 1] - ai[arow]; 5003 aj = a->j + ai[arow]; 5004 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5005 bnzi += nlnk; 5006 /* add received col data into lnk */ 5007 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5008 if (i == *nextrow[k]) { /* i-th row */ 5009 anzi = *(nextai[k] + 1) - *nextai[k]; 5010 aj = buf_rj[k] + *nextai[k]; 5011 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5012 bnzi += nlnk; 5013 nextrow[k]++; 5014 nextai[k]++; 5015 } 5016 } 5017 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5018 5019 /* if free space is not available, make more free space */ 5020 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5021 /* copy data into free space, then initialize lnk */ 5022 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5023 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5024 5025 current_space->array += bnzi; 5026 current_space->local_used += bnzi; 5027 current_space->local_remaining -= bnzi; 5028 5029 bi[i + 1] = bi[i] + bnzi; 5030 } 5031 5032 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5033 5034 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5035 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5036 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5037 5038 /* create symbolic parallel matrix B_mpi */ 5039 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5040 PetscCall(MatCreate(comm, &B_mpi)); 5041 if (n == PETSC_DECIDE) { 5042 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5043 } else { 5044 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5045 } 5046 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5047 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5048 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5049 MatPreallocateEnd(dnz, onz); 5050 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5051 5052 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5053 B_mpi->assembled = PETSC_FALSE; 5054 merge->bi = bi; 5055 merge->bj = bj; 5056 merge->buf_ri = buf_ri; 5057 merge->buf_rj = buf_rj; 5058 merge->coi = NULL; 5059 merge->coj = NULL; 5060 merge->owners_co = NULL; 5061 5062 PetscCall(PetscCommDestroy(&comm)); 5063 5064 /* attach the supporting struct to B_mpi for reuse */ 5065 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5066 PetscCall(PetscContainerSetPointer(container, merge)); 5067 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5068 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5069 PetscCall(PetscContainerDestroy(&container)); 5070 *mpimat = B_mpi; 5071 5072 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5073 PetscFunctionReturn(PETSC_SUCCESS); 5074 } 5075 5076 /*@ 5077 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5078 matrices from each processor 5079 5080 Collective 5081 5082 Input Parameters: 5083 + comm - the communicators the parallel matrix will live on 5084 . seqmat - the input sequential matrices 5085 . m - number of local rows (or `PETSC_DECIDE`) 5086 . n - number of local columns (or `PETSC_DECIDE`) 5087 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5088 5089 Output Parameter: 5090 . mpimat - the parallel matrix generated 5091 5092 Level: advanced 5093 5094 Note: 5095 The dimensions of the sequential matrix in each processor MUST be the same. 5096 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5097 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5098 5099 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5100 @*/ 5101 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5102 { 5103 PetscMPIInt size; 5104 5105 PetscFunctionBegin; 5106 PetscCallMPI(MPI_Comm_size(comm, &size)); 5107 if (size == 1) { 5108 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5109 if (scall == MAT_INITIAL_MATRIX) { 5110 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5111 } else { 5112 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5113 } 5114 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5115 PetscFunctionReturn(PETSC_SUCCESS); 5116 } 5117 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5118 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5119 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5120 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5121 PetscFunctionReturn(PETSC_SUCCESS); 5122 } 5123 5124 /*@ 5125 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5126 5127 Not Collective 5128 5129 Input Parameter: 5130 . A - the matrix 5131 5132 Output Parameter: 5133 . A_loc - the local sequential matrix generated 5134 5135 Level: developer 5136 5137 Notes: 5138 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5139 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5140 `n` is the global column count obtained with `MatGetSize()` 5141 5142 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5143 5144 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5145 5146 Destroy the matrix with `MatDestroy()` 5147 5148 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5149 @*/ 5150 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5151 { 5152 PetscBool mpi; 5153 5154 PetscFunctionBegin; 5155 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5156 if (mpi) { 5157 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5158 } else { 5159 *A_loc = A; 5160 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5161 } 5162 PetscFunctionReturn(PETSC_SUCCESS); 5163 } 5164 5165 /*@ 5166 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5167 5168 Not Collective 5169 5170 Input Parameters: 5171 + A - the matrix 5172 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5173 5174 Output Parameter: 5175 . A_loc - the local sequential matrix generated 5176 5177 Level: developer 5178 5179 Notes: 5180 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5181 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5182 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5183 5184 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5185 5186 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5187 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5188 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5189 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5190 5191 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5192 @*/ 5193 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5194 { 5195 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5196 Mat_SeqAIJ *mat, *a, *b; 5197 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5198 const PetscScalar *aa, *ba, *aav, *bav; 5199 PetscScalar *ca, *cam; 5200 PetscMPIInt size; 5201 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5202 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5203 PetscBool match; 5204 5205 PetscFunctionBegin; 5206 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5207 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5208 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5209 if (size == 1) { 5210 if (scall == MAT_INITIAL_MATRIX) { 5211 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5212 *A_loc = mpimat->A; 5213 } else if (scall == MAT_REUSE_MATRIX) { 5214 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5215 } 5216 PetscFunctionReturn(PETSC_SUCCESS); 5217 } 5218 5219 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5220 a = (Mat_SeqAIJ *)mpimat->A->data; 5221 b = (Mat_SeqAIJ *)mpimat->B->data; 5222 ai = a->i; 5223 aj = a->j; 5224 bi = b->i; 5225 bj = b->j; 5226 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5227 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5228 aa = aav; 5229 ba = bav; 5230 if (scall == MAT_INITIAL_MATRIX) { 5231 PetscCall(PetscMalloc1(1 + am, &ci)); 5232 ci[0] = 0; 5233 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5234 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5235 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5236 k = 0; 5237 for (i = 0; i < am; i++) { 5238 ncols_o = bi[i + 1] - bi[i]; 5239 ncols_d = ai[i + 1] - ai[i]; 5240 /* off-diagonal portion of A */ 5241 for (jo = 0; jo < ncols_o; jo++) { 5242 col = cmap[*bj]; 5243 if (col >= cstart) break; 5244 cj[k] = col; 5245 bj++; 5246 ca[k++] = *ba++; 5247 } 5248 /* diagonal portion of A */ 5249 for (j = 0; j < ncols_d; j++) { 5250 cj[k] = cstart + *aj++; 5251 ca[k++] = *aa++; 5252 } 5253 /* off-diagonal portion of A */ 5254 for (j = jo; j < ncols_o; j++) { 5255 cj[k] = cmap[*bj++]; 5256 ca[k++] = *ba++; 5257 } 5258 } 5259 /* put together the new matrix */ 5260 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5261 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5262 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5263 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5264 mat->free_a = PETSC_TRUE; 5265 mat->free_ij = PETSC_TRUE; 5266 mat->nonew = 0; 5267 } else if (scall == MAT_REUSE_MATRIX) { 5268 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5269 ci = mat->i; 5270 cj = mat->j; 5271 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5272 for (i = 0; i < am; i++) { 5273 /* off-diagonal portion of A */ 5274 ncols_o = bi[i + 1] - bi[i]; 5275 for (jo = 0; jo < ncols_o; jo++) { 5276 col = cmap[*bj]; 5277 if (col >= cstart) break; 5278 *cam++ = *ba++; 5279 bj++; 5280 } 5281 /* diagonal portion of A */ 5282 ncols_d = ai[i + 1] - ai[i]; 5283 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5284 /* off-diagonal portion of A */ 5285 for (j = jo; j < ncols_o; j++) { 5286 *cam++ = *ba++; 5287 bj++; 5288 } 5289 } 5290 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5291 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5292 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5293 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5294 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5295 PetscFunctionReturn(PETSC_SUCCESS); 5296 } 5297 5298 /*@ 5299 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5300 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5301 5302 Not Collective 5303 5304 Input Parameters: 5305 + A - the matrix 5306 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5307 5308 Output Parameters: 5309 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5310 - A_loc - the local sequential matrix generated 5311 5312 Level: developer 5313 5314 Note: 5315 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5316 part, then those associated with the off-diagonal part (in its local ordering) 5317 5318 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5319 @*/ 5320 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5321 { 5322 Mat Ao, Ad; 5323 const PetscInt *cmap; 5324 PetscMPIInt size; 5325 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5326 5327 PetscFunctionBegin; 5328 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5329 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5330 if (size == 1) { 5331 if (scall == MAT_INITIAL_MATRIX) { 5332 PetscCall(PetscObjectReference((PetscObject)Ad)); 5333 *A_loc = Ad; 5334 } else if (scall == MAT_REUSE_MATRIX) { 5335 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5336 } 5337 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5338 PetscFunctionReturn(PETSC_SUCCESS); 5339 } 5340 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5341 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5342 if (f) { 5343 PetscCall((*f)(A, scall, glob, A_loc)); 5344 } else { 5345 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5347 Mat_SeqAIJ *c; 5348 PetscInt *ai = a->i, *aj = a->j; 5349 PetscInt *bi = b->i, *bj = b->j; 5350 PetscInt *ci, *cj; 5351 const PetscScalar *aa, *ba; 5352 PetscScalar *ca; 5353 PetscInt i, j, am, dn, on; 5354 5355 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5356 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5357 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5358 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5359 if (scall == MAT_INITIAL_MATRIX) { 5360 PetscInt k; 5361 PetscCall(PetscMalloc1(1 + am, &ci)); 5362 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5363 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5364 ci[0] = 0; 5365 for (i = 0, k = 0; i < am; i++) { 5366 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5367 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5368 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5369 /* diagonal portion of A */ 5370 for (j = 0; j < ncols_d; j++, k++) { 5371 cj[k] = *aj++; 5372 ca[k] = *aa++; 5373 } 5374 /* off-diagonal portion of A */ 5375 for (j = 0; j < ncols_o; j++, k++) { 5376 cj[k] = dn + *bj++; 5377 ca[k] = *ba++; 5378 } 5379 } 5380 /* put together the new matrix */ 5381 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5382 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5383 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5384 c = (Mat_SeqAIJ *)(*A_loc)->data; 5385 c->free_a = PETSC_TRUE; 5386 c->free_ij = PETSC_TRUE; 5387 c->nonew = 0; 5388 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5389 } else if (scall == MAT_REUSE_MATRIX) { 5390 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5391 for (i = 0; i < am; i++) { 5392 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5393 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5394 /* diagonal portion of A */ 5395 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5396 /* off-diagonal portion of A */ 5397 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5398 } 5399 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5400 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5401 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5402 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5403 if (glob) { 5404 PetscInt cst, *gidx; 5405 5406 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5407 PetscCall(PetscMalloc1(dn + on, &gidx)); 5408 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5409 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5410 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5411 } 5412 } 5413 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5414 PetscFunctionReturn(PETSC_SUCCESS); 5415 } 5416 5417 /*@C 5418 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5419 5420 Not Collective 5421 5422 Input Parameters: 5423 + A - the matrix 5424 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5425 . row - index set of rows to extract (or `NULL`) 5426 - col - index set of columns to extract (or `NULL`) 5427 5428 Output Parameter: 5429 . A_loc - the local sequential matrix generated 5430 5431 Level: developer 5432 5433 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5434 @*/ 5435 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5436 { 5437 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5438 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5439 IS isrowa, iscola; 5440 Mat *aloc; 5441 PetscBool match; 5442 5443 PetscFunctionBegin; 5444 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5445 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5446 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5447 if (!row) { 5448 start = A->rmap->rstart; 5449 end = A->rmap->rend; 5450 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5451 } else { 5452 isrowa = *row; 5453 } 5454 if (!col) { 5455 start = A->cmap->rstart; 5456 cmap = a->garray; 5457 nzA = a->A->cmap->n; 5458 nzB = a->B->cmap->n; 5459 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5460 ncols = 0; 5461 for (i = 0; i < nzB; i++) { 5462 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5463 else break; 5464 } 5465 imark = i; 5466 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5467 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5468 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5469 } else { 5470 iscola = *col; 5471 } 5472 if (scall != MAT_INITIAL_MATRIX) { 5473 PetscCall(PetscMalloc1(1, &aloc)); 5474 aloc[0] = *A_loc; 5475 } 5476 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5477 if (!col) { /* attach global id of condensed columns */ 5478 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5479 } 5480 *A_loc = aloc[0]; 5481 PetscCall(PetscFree(aloc)); 5482 if (!row) PetscCall(ISDestroy(&isrowa)); 5483 if (!col) PetscCall(ISDestroy(&iscola)); 5484 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5485 PetscFunctionReturn(PETSC_SUCCESS); 5486 } 5487 5488 /* 5489 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5490 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5491 * on a global size. 5492 * */ 5493 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5494 { 5495 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5496 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5497 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5498 PetscMPIInt owner; 5499 PetscSFNode *iremote, *oiremote; 5500 const PetscInt *lrowindices; 5501 PetscSF sf, osf; 5502 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5503 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5504 MPI_Comm comm; 5505 ISLocalToGlobalMapping mapping; 5506 const PetscScalar *pd_a, *po_a; 5507 5508 PetscFunctionBegin; 5509 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5510 /* plocalsize is the number of roots 5511 * nrows is the number of leaves 5512 * */ 5513 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5514 PetscCall(ISGetLocalSize(rows, &nrows)); 5515 PetscCall(PetscCalloc1(nrows, &iremote)); 5516 PetscCall(ISGetIndices(rows, &lrowindices)); 5517 for (i = 0; i < nrows; i++) { 5518 /* Find a remote index and an owner for a row 5519 * The row could be local or remote 5520 * */ 5521 owner = 0; 5522 lidx = 0; 5523 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5524 iremote[i].index = lidx; 5525 iremote[i].rank = owner; 5526 } 5527 /* Create SF to communicate how many nonzero columns for each row */ 5528 PetscCall(PetscSFCreate(comm, &sf)); 5529 /* SF will figure out the number of nonzero columns for each row, and their 5530 * offsets 5531 * */ 5532 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5533 PetscCall(PetscSFSetFromOptions(sf)); 5534 PetscCall(PetscSFSetUp(sf)); 5535 5536 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5537 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5538 PetscCall(PetscCalloc1(nrows, &pnnz)); 5539 roffsets[0] = 0; 5540 roffsets[1] = 0; 5541 for (i = 0; i < plocalsize; i++) { 5542 /* diagonal */ 5543 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5544 /* off-diagonal */ 5545 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5546 /* compute offsets so that we relative location for each row */ 5547 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5548 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5549 } 5550 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5551 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5552 /* 'r' means root, and 'l' means leaf */ 5553 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5554 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5556 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5557 PetscCall(PetscSFDestroy(&sf)); 5558 PetscCall(PetscFree(roffsets)); 5559 PetscCall(PetscFree(nrcols)); 5560 dntotalcols = 0; 5561 ontotalcols = 0; 5562 ncol = 0; 5563 for (i = 0; i < nrows; i++) { 5564 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5565 ncol = PetscMax(pnnz[i], ncol); 5566 /* diagonal */ 5567 dntotalcols += nlcols[i * 2 + 0]; 5568 /* off-diagonal */ 5569 ontotalcols += nlcols[i * 2 + 1]; 5570 } 5571 /* We do not need to figure the right number of columns 5572 * since all the calculations will be done by going through the raw data 5573 * */ 5574 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5575 PetscCall(MatSetUp(*P_oth)); 5576 PetscCall(PetscFree(pnnz)); 5577 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5578 /* diagonal */ 5579 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5580 /* off-diagonal */ 5581 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5582 /* diagonal */ 5583 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5584 /* off-diagonal */ 5585 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5586 dntotalcols = 0; 5587 ontotalcols = 0; 5588 ntotalcols = 0; 5589 for (i = 0; i < nrows; i++) { 5590 owner = 0; 5591 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5592 /* Set iremote for diag matrix */ 5593 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5594 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5595 iremote[dntotalcols].rank = owner; 5596 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5597 ilocal[dntotalcols++] = ntotalcols++; 5598 } 5599 /* off-diagonal */ 5600 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5601 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5602 oiremote[ontotalcols].rank = owner; 5603 oilocal[ontotalcols++] = ntotalcols++; 5604 } 5605 } 5606 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5607 PetscCall(PetscFree(loffsets)); 5608 PetscCall(PetscFree(nlcols)); 5609 PetscCall(PetscSFCreate(comm, &sf)); 5610 /* P serves as roots and P_oth is leaves 5611 * Diag matrix 5612 * */ 5613 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5614 PetscCall(PetscSFSetFromOptions(sf)); 5615 PetscCall(PetscSFSetUp(sf)); 5616 5617 PetscCall(PetscSFCreate(comm, &osf)); 5618 /* off-diagonal */ 5619 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5620 PetscCall(PetscSFSetFromOptions(osf)); 5621 PetscCall(PetscSFSetUp(osf)); 5622 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5623 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5624 /* operate on the matrix internal data to save memory */ 5625 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5626 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5627 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5628 /* Convert to global indices for diag matrix */ 5629 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5630 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5631 /* We want P_oth store global indices */ 5632 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5633 /* Use memory scalable approach */ 5634 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5635 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5636 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5637 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5638 /* Convert back to local indices */ 5639 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5640 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5641 nout = 0; 5642 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5643 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5644 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5645 /* Exchange values */ 5646 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5647 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5649 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5650 /* Stop PETSc from shrinking memory */ 5651 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5652 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5653 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5654 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5655 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5656 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5657 PetscCall(PetscSFDestroy(&sf)); 5658 PetscCall(PetscSFDestroy(&osf)); 5659 PetscFunctionReturn(PETSC_SUCCESS); 5660 } 5661 5662 /* 5663 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5664 * This supports MPIAIJ and MAIJ 5665 * */ 5666 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5667 { 5668 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5669 Mat_SeqAIJ *p_oth; 5670 IS rows, map; 5671 PetscHMapI hamp; 5672 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5673 MPI_Comm comm; 5674 PetscSF sf, osf; 5675 PetscBool has; 5676 5677 PetscFunctionBegin; 5678 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5679 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5680 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5681 * and then create a submatrix (that often is an overlapping matrix) 5682 * */ 5683 if (reuse == MAT_INITIAL_MATRIX) { 5684 /* Use a hash table to figure out unique keys */ 5685 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5686 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5687 count = 0; 5688 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5689 for (i = 0; i < a->B->cmap->n; i++) { 5690 key = a->garray[i] / dof; 5691 PetscCall(PetscHMapIHas(hamp, key, &has)); 5692 if (!has) { 5693 mapping[i] = count; 5694 PetscCall(PetscHMapISet(hamp, key, count++)); 5695 } else { 5696 /* Current 'i' has the same value the previous step */ 5697 mapping[i] = count - 1; 5698 } 5699 } 5700 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5701 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5702 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5703 PetscCall(PetscCalloc1(htsize, &rowindices)); 5704 off = 0; 5705 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5706 PetscCall(PetscHMapIDestroy(&hamp)); 5707 PetscCall(PetscSortInt(htsize, rowindices)); 5708 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5709 /* In case, the matrix was already created but users want to recreate the matrix */ 5710 PetscCall(MatDestroy(P_oth)); 5711 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5712 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5713 PetscCall(ISDestroy(&map)); 5714 PetscCall(ISDestroy(&rows)); 5715 } else if (reuse == MAT_REUSE_MATRIX) { 5716 /* If matrix was already created, we simply update values using SF objects 5717 * that as attached to the matrix earlier. 5718 */ 5719 const PetscScalar *pd_a, *po_a; 5720 5721 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5722 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5723 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5724 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5725 /* Update values in place */ 5726 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5727 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5728 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5729 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5730 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5731 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5732 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5733 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5734 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5735 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5736 PetscFunctionReturn(PETSC_SUCCESS); 5737 } 5738 5739 /*@C 5740 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5741 5742 Collective 5743 5744 Input Parameters: 5745 + A - the first matrix in `MATMPIAIJ` format 5746 . B - the second matrix in `MATMPIAIJ` format 5747 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5748 5749 Output Parameters: 5750 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5751 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5752 - B_seq - the sequential matrix generated 5753 5754 Level: developer 5755 5756 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5757 @*/ 5758 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5759 { 5760 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5761 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5762 IS isrowb, iscolb; 5763 Mat *bseq = NULL; 5764 5765 PetscFunctionBegin; 5766 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5767 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5768 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5769 5770 if (scall == MAT_INITIAL_MATRIX) { 5771 start = A->cmap->rstart; 5772 cmap = a->garray; 5773 nzA = a->A->cmap->n; 5774 nzB = a->B->cmap->n; 5775 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5776 ncols = 0; 5777 for (i = 0; i < nzB; i++) { /* row < local row index */ 5778 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5779 else break; 5780 } 5781 imark = i; 5782 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5783 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5784 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5785 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5786 } else { 5787 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5788 isrowb = *rowb; 5789 iscolb = *colb; 5790 PetscCall(PetscMalloc1(1, &bseq)); 5791 bseq[0] = *B_seq; 5792 } 5793 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5794 *B_seq = bseq[0]; 5795 PetscCall(PetscFree(bseq)); 5796 if (!rowb) { 5797 PetscCall(ISDestroy(&isrowb)); 5798 } else { 5799 *rowb = isrowb; 5800 } 5801 if (!colb) { 5802 PetscCall(ISDestroy(&iscolb)); 5803 } else { 5804 *colb = iscolb; 5805 } 5806 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5807 PetscFunctionReturn(PETSC_SUCCESS); 5808 } 5809 5810 /* 5811 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5812 of the OFF-DIAGONAL portion of local A 5813 5814 Collective 5815 5816 Input Parameters: 5817 + A,B - the matrices in `MATMPIAIJ` format 5818 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5819 5820 Output Parameter: 5821 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5822 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5823 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5824 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5825 5826 Developer Note: 5827 This directly accesses information inside the VecScatter associated with the matrix-vector product 5828 for this matrix. This is not desirable.. 5829 5830 Level: developer 5831 5832 */ 5833 5834 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5835 { 5836 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5837 VecScatter ctx; 5838 MPI_Comm comm; 5839 const PetscMPIInt *rprocs, *sprocs; 5840 PetscMPIInt nrecvs, nsends; 5841 const PetscInt *srow, *rstarts, *sstarts; 5842 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5843 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5844 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5845 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5846 PetscMPIInt size, tag, rank, nreqs; 5847 5848 PetscFunctionBegin; 5849 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5850 PetscCallMPI(MPI_Comm_size(comm, &size)); 5851 5852 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5853 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5854 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5855 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5856 5857 if (size == 1) { 5858 startsj_s = NULL; 5859 bufa_ptr = NULL; 5860 *B_oth = NULL; 5861 PetscFunctionReturn(PETSC_SUCCESS); 5862 } 5863 5864 ctx = a->Mvctx; 5865 tag = ((PetscObject)ctx)->tag; 5866 5867 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5868 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5869 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5870 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5871 PetscCall(PetscMalloc1(nreqs, &reqs)); 5872 rwaits = reqs; 5873 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5874 5875 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5876 if (scall == MAT_INITIAL_MATRIX) { 5877 /* i-array */ 5878 /* post receives */ 5879 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5880 for (i = 0; i < nrecvs; i++) { 5881 rowlen = rvalues + rstarts[i] * rbs; 5882 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5883 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5884 } 5885 5886 /* pack the outgoing message */ 5887 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5888 5889 sstartsj[0] = 0; 5890 rstartsj[0] = 0; 5891 len = 0; /* total length of j or a array to be sent */ 5892 if (nsends) { 5893 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5894 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5895 } 5896 for (i = 0; i < nsends; i++) { 5897 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5898 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5899 for (j = 0; j < nrows; j++) { 5900 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5901 for (l = 0; l < sbs; l++) { 5902 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5903 5904 rowlen[j * sbs + l] = ncols; 5905 5906 len += ncols; 5907 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5908 } 5909 k++; 5910 } 5911 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5912 5913 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5914 } 5915 /* recvs and sends of i-array are completed */ 5916 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5917 PetscCall(PetscFree(svalues)); 5918 5919 /* allocate buffers for sending j and a arrays */ 5920 PetscCall(PetscMalloc1(len + 1, &bufj)); 5921 PetscCall(PetscMalloc1(len + 1, &bufa)); 5922 5923 /* create i-array of B_oth */ 5924 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5925 5926 b_othi[0] = 0; 5927 len = 0; /* total length of j or a array to be received */ 5928 k = 0; 5929 for (i = 0; i < nrecvs; i++) { 5930 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5931 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5932 for (j = 0; j < nrows; j++) { 5933 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5934 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5935 k++; 5936 } 5937 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5938 } 5939 PetscCall(PetscFree(rvalues)); 5940 5941 /* allocate space for j and a arrays of B_oth */ 5942 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5943 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5944 5945 /* j-array */ 5946 /* post receives of j-array */ 5947 for (i = 0; i < nrecvs; i++) { 5948 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5949 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5950 } 5951 5952 /* pack the outgoing message j-array */ 5953 if (nsends) k = sstarts[0]; 5954 for (i = 0; i < nsends; i++) { 5955 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5956 bufJ = bufj + sstartsj[i]; 5957 for (j = 0; j < nrows; j++) { 5958 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5959 for (ll = 0; ll < sbs; ll++) { 5960 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5961 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5962 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5963 } 5964 } 5965 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5966 } 5967 5968 /* recvs and sends of j-array are completed */ 5969 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5970 } else if (scall == MAT_REUSE_MATRIX) { 5971 sstartsj = *startsj_s; 5972 rstartsj = *startsj_r; 5973 bufa = *bufa_ptr; 5974 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5975 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5976 5977 /* a-array */ 5978 /* post receives of a-array */ 5979 for (i = 0; i < nrecvs; i++) { 5980 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5981 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5982 } 5983 5984 /* pack the outgoing message a-array */ 5985 if (nsends) k = sstarts[0]; 5986 for (i = 0; i < nsends; i++) { 5987 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5988 bufA = bufa + sstartsj[i]; 5989 for (j = 0; j < nrows; j++) { 5990 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5991 for (ll = 0; ll < sbs; ll++) { 5992 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5993 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5994 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5995 } 5996 } 5997 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5998 } 5999 /* recvs and sends of a-array are completed */ 6000 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6001 PetscCall(PetscFree(reqs)); 6002 6003 if (scall == MAT_INITIAL_MATRIX) { 6004 Mat_SeqAIJ *b_oth; 6005 6006 /* put together the new matrix */ 6007 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6008 6009 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6010 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6011 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6012 b_oth->free_a = PETSC_TRUE; 6013 b_oth->free_ij = PETSC_TRUE; 6014 b_oth->nonew = 0; 6015 6016 PetscCall(PetscFree(bufj)); 6017 if (!startsj_s || !bufa_ptr) { 6018 PetscCall(PetscFree2(sstartsj, rstartsj)); 6019 PetscCall(PetscFree(bufa_ptr)); 6020 } else { 6021 *startsj_s = sstartsj; 6022 *startsj_r = rstartsj; 6023 *bufa_ptr = bufa; 6024 } 6025 } else if (scall == MAT_REUSE_MATRIX) { 6026 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6027 } 6028 6029 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6030 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6031 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6032 PetscFunctionReturn(PETSC_SUCCESS); 6033 } 6034 6035 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6036 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6037 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6038 #if defined(PETSC_HAVE_MKL_SPARSE) 6039 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6040 #endif 6041 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6042 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6043 #if defined(PETSC_HAVE_ELEMENTAL) 6044 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6045 #endif 6046 #if defined(PETSC_HAVE_SCALAPACK) 6047 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6048 #endif 6049 #if defined(PETSC_HAVE_HYPRE) 6050 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6051 #endif 6052 #if defined(PETSC_HAVE_CUDA) 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6054 #endif 6055 #if defined(PETSC_HAVE_HIP) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6060 #endif 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6062 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6063 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6064 6065 /* 6066 Computes (B'*A')' since computing B*A directly is untenable 6067 6068 n p p 6069 [ ] [ ] [ ] 6070 m [ A ] * n [ B ] = m [ C ] 6071 [ ] [ ] [ ] 6072 6073 */ 6074 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6075 { 6076 Mat At, Bt, Ct; 6077 6078 PetscFunctionBegin; 6079 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6080 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6081 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6082 PetscCall(MatDestroy(&At)); 6083 PetscCall(MatDestroy(&Bt)); 6084 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6085 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6086 PetscCall(MatDestroy(&Ct)); 6087 PetscFunctionReturn(PETSC_SUCCESS); 6088 } 6089 6090 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6091 { 6092 PetscBool cisdense; 6093 6094 PetscFunctionBegin; 6095 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6096 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6097 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6098 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6099 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6100 PetscCall(MatSetUp(C)); 6101 6102 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6103 PetscFunctionReturn(PETSC_SUCCESS); 6104 } 6105 6106 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6107 { 6108 Mat_Product *product = C->product; 6109 Mat A = product->A, B = product->B; 6110 6111 PetscFunctionBegin; 6112 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6113 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6114 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6115 C->ops->productsymbolic = MatProductSymbolic_AB; 6116 PetscFunctionReturn(PETSC_SUCCESS); 6117 } 6118 6119 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6120 { 6121 Mat_Product *product = C->product; 6122 6123 PetscFunctionBegin; 6124 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6125 PetscFunctionReturn(PETSC_SUCCESS); 6126 } 6127 6128 /* 6129 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6130 6131 Input Parameters: 6132 6133 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6134 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6135 6136 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6137 6138 For Set1, j1[] contains column indices of the nonzeros. 6139 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6140 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6141 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6142 6143 Similar for Set2. 6144 6145 This routine merges the two sets of nonzeros row by row and removes repeats. 6146 6147 Output Parameters: (memory is allocated by the caller) 6148 6149 i[],j[]: the CSR of the merged matrix, which has m rows. 6150 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6151 imap2[]: similar to imap1[], but for Set2. 6152 Note we order nonzeros row-by-row and from left to right. 6153 */ 6154 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6155 { 6156 PetscInt r, m; /* Row index of mat */ 6157 PetscCount t, t1, t2, b1, e1, b2, e2; 6158 6159 PetscFunctionBegin; 6160 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6161 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6162 i[0] = 0; 6163 for (r = 0; r < m; r++) { /* Do row by row merging */ 6164 b1 = rowBegin1[r]; 6165 e1 = rowEnd1[r]; 6166 b2 = rowBegin2[r]; 6167 e2 = rowEnd2[r]; 6168 while (b1 < e1 && b2 < e2) { 6169 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6170 j[t] = j1[b1]; 6171 imap1[t1] = t; 6172 imap2[t2] = t; 6173 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6174 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6175 t1++; 6176 t2++; 6177 t++; 6178 } else if (j1[b1] < j2[b2]) { 6179 j[t] = j1[b1]; 6180 imap1[t1] = t; 6181 b1 += jmap1[t1 + 1] - jmap1[t1]; 6182 t1++; 6183 t++; 6184 } else { 6185 j[t] = j2[b2]; 6186 imap2[t2] = t; 6187 b2 += jmap2[t2 + 1] - jmap2[t2]; 6188 t2++; 6189 t++; 6190 } 6191 } 6192 /* Merge the remaining in either j1[] or j2[] */ 6193 while (b1 < e1) { 6194 j[t] = j1[b1]; 6195 imap1[t1] = t; 6196 b1 += jmap1[t1 + 1] - jmap1[t1]; 6197 t1++; 6198 t++; 6199 } 6200 while (b2 < e2) { 6201 j[t] = j2[b2]; 6202 imap2[t2] = t; 6203 b2 += jmap2[t2 + 1] - jmap2[t2]; 6204 t2++; 6205 t++; 6206 } 6207 PetscCall(PetscIntCast(t, i + r + 1)); 6208 } 6209 PetscFunctionReturn(PETSC_SUCCESS); 6210 } 6211 6212 /* 6213 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6214 6215 Input Parameters: 6216 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6217 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6218 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6219 6220 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6221 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6222 6223 Output Parameters: 6224 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6225 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6226 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6227 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6228 6229 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6230 Atot: number of entries belonging to the diagonal block. 6231 Annz: number of unique nonzeros belonging to the diagonal block. 6232 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6233 repeats (i.e., same 'i,j' pair). 6234 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6235 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6236 6237 Atot: number of entries belonging to the diagonal block 6238 Annz: number of unique nonzeros belonging to the diagonal block. 6239 6240 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6241 6242 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6243 */ 6244 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6245 { 6246 PetscInt cstart, cend, rstart, rend, row, col; 6247 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6248 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6249 PetscCount k, m, p, q, r, s, mid; 6250 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6251 6252 PetscFunctionBegin; 6253 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6254 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6255 m = rend - rstart; 6256 6257 /* Skip negative rows */ 6258 for (k = 0; k < n; k++) 6259 if (i[k] >= 0) break; 6260 6261 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6262 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6263 */ 6264 while (k < n) { 6265 row = i[k]; 6266 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6267 for (s = k; s < n; s++) 6268 if (i[s] != row) break; 6269 6270 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6271 for (p = k; p < s; p++) { 6272 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6273 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6274 } 6275 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6276 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6277 rowBegin[row - rstart] = k; 6278 rowMid[row - rstart] = mid; 6279 rowEnd[row - rstart] = s; 6280 6281 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6282 Atot += mid - k; 6283 Btot += s - mid; 6284 6285 /* Count unique nonzeros of this diag row */ 6286 for (p = k; p < mid;) { 6287 col = j[p]; 6288 do { 6289 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6290 p++; 6291 } while (p < mid && j[p] == col); 6292 Annz++; 6293 } 6294 6295 /* Count unique nonzeros of this offdiag row */ 6296 for (p = mid; p < s;) { 6297 col = j[p]; 6298 do { 6299 p++; 6300 } while (p < s && j[p] == col); 6301 Bnnz++; 6302 } 6303 k = s; 6304 } 6305 6306 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6307 PetscCall(PetscMalloc1(Atot, &Aperm)); 6308 PetscCall(PetscMalloc1(Btot, &Bperm)); 6309 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6310 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6311 6312 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6313 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6314 for (r = 0; r < m; r++) { 6315 k = rowBegin[r]; 6316 mid = rowMid[r]; 6317 s = rowEnd[r]; 6318 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6319 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6320 Atot += mid - k; 6321 Btot += s - mid; 6322 6323 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6324 for (p = k; p < mid;) { 6325 col = j[p]; 6326 q = p; 6327 do { 6328 p++; 6329 } while (p < mid && j[p] == col); 6330 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6331 Annz++; 6332 } 6333 6334 for (p = mid; p < s;) { 6335 col = j[p]; 6336 q = p; 6337 do { 6338 p++; 6339 } while (p < s && j[p] == col); 6340 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6341 Bnnz++; 6342 } 6343 } 6344 /* Output */ 6345 *Aperm_ = Aperm; 6346 *Annz_ = Annz; 6347 *Atot_ = Atot; 6348 *Ajmap_ = Ajmap; 6349 *Bperm_ = Bperm; 6350 *Bnnz_ = Bnnz; 6351 *Btot_ = Btot; 6352 *Bjmap_ = Bjmap; 6353 PetscFunctionReturn(PETSC_SUCCESS); 6354 } 6355 6356 /* 6357 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6358 6359 Input Parameters: 6360 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6361 nnz: number of unique nonzeros in the merged matrix 6362 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6363 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6364 6365 Output Parameter: (memory is allocated by the caller) 6366 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6367 6368 Example: 6369 nnz1 = 4 6370 nnz = 6 6371 imap = [1,3,4,5] 6372 jmap = [0,3,5,6,7] 6373 then, 6374 jmap_new = [0,0,3,3,5,6,7] 6375 */ 6376 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6377 { 6378 PetscCount k, p; 6379 6380 PetscFunctionBegin; 6381 jmap_new[0] = 0; 6382 p = nnz; /* p loops over jmap_new[] backwards */ 6383 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6384 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6385 } 6386 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6387 PetscFunctionReturn(PETSC_SUCCESS); 6388 } 6389 6390 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6391 { 6392 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6393 6394 PetscFunctionBegin; 6395 PetscCall(PetscSFDestroy(&coo->sf)); 6396 PetscCall(PetscFree(coo->Aperm1)); 6397 PetscCall(PetscFree(coo->Bperm1)); 6398 PetscCall(PetscFree(coo->Ajmap1)); 6399 PetscCall(PetscFree(coo->Bjmap1)); 6400 PetscCall(PetscFree(coo->Aimap2)); 6401 PetscCall(PetscFree(coo->Bimap2)); 6402 PetscCall(PetscFree(coo->Aperm2)); 6403 PetscCall(PetscFree(coo->Bperm2)); 6404 PetscCall(PetscFree(coo->Ajmap2)); 6405 PetscCall(PetscFree(coo->Bjmap2)); 6406 PetscCall(PetscFree(coo->Cperm1)); 6407 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6408 PetscCall(PetscFree(coo)); 6409 PetscFunctionReturn(PETSC_SUCCESS); 6410 } 6411 6412 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6413 { 6414 MPI_Comm comm; 6415 PetscMPIInt rank, size; 6416 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6417 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6418 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6419 PetscContainer container; 6420 MatCOOStruct_MPIAIJ *coo; 6421 6422 PetscFunctionBegin; 6423 PetscCall(PetscFree(mpiaij->garray)); 6424 PetscCall(VecDestroy(&mpiaij->lvec)); 6425 #if defined(PETSC_USE_CTABLE) 6426 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6427 #else 6428 PetscCall(PetscFree(mpiaij->colmap)); 6429 #endif 6430 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6431 mat->assembled = PETSC_FALSE; 6432 mat->was_assembled = PETSC_FALSE; 6433 6434 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6435 PetscCallMPI(MPI_Comm_size(comm, &size)); 6436 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6437 PetscCall(PetscLayoutSetUp(mat->rmap)); 6438 PetscCall(PetscLayoutSetUp(mat->cmap)); 6439 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6440 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6441 PetscCall(MatGetLocalSize(mat, &m, &n)); 6442 PetscCall(MatGetSize(mat, &M, &N)); 6443 6444 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6445 /* entries come first, then local rows, then remote rows. */ 6446 PetscCount n1 = coo_n, *perm1; 6447 PetscInt *i1 = coo_i, *j1 = coo_j; 6448 6449 PetscCall(PetscMalloc1(n1, &perm1)); 6450 for (k = 0; k < n1; k++) perm1[k] = k; 6451 6452 /* Manipulate indices so that entries with negative row or col indices will have smallest 6453 row indices, local entries will have greater but negative row indices, and remote entries 6454 will have positive row indices. 6455 */ 6456 for (k = 0; k < n1; k++) { 6457 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6458 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6459 else { 6460 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6461 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6462 } 6463 } 6464 6465 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6466 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6467 6468 /* Advance k to the first entry we need to take care of */ 6469 for (k = 0; k < n1; k++) 6470 if (i1[k] > PETSC_INT_MIN) break; 6471 PetscCount i1start = k; 6472 6473 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6474 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6475 6476 /* Send remote rows to their owner */ 6477 /* Find which rows should be sent to which remote ranks*/ 6478 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6479 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6480 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6481 const PetscInt *ranges; 6482 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6483 6484 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6485 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6486 for (k = rem; k < n1;) { 6487 PetscMPIInt owner; 6488 PetscInt firstRow, lastRow; 6489 6490 /* Locate a row range */ 6491 firstRow = i1[k]; /* first row of this owner */ 6492 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6493 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6494 6495 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6496 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6497 6498 /* All entries in [k,p) belong to this remote owner */ 6499 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6500 PetscMPIInt *sendto2; 6501 PetscInt *nentries2; 6502 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6503 6504 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6505 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6506 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6507 PetscCall(PetscFree2(sendto, nentries2)); 6508 sendto = sendto2; 6509 nentries = nentries2; 6510 maxNsend = maxNsend2; 6511 } 6512 sendto[nsend] = owner; 6513 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6514 nsend++; 6515 k = p; 6516 } 6517 6518 /* Build 1st SF to know offsets on remote to send data */ 6519 PetscSF sf1; 6520 PetscInt nroots = 1, nroots2 = 0; 6521 PetscInt nleaves = nsend, nleaves2 = 0; 6522 PetscInt *offsets; 6523 PetscSFNode *iremote; 6524 6525 PetscCall(PetscSFCreate(comm, &sf1)); 6526 PetscCall(PetscMalloc1(nsend, &iremote)); 6527 PetscCall(PetscMalloc1(nsend, &offsets)); 6528 for (k = 0; k < nsend; k++) { 6529 iremote[k].rank = sendto[k]; 6530 iremote[k].index = 0; 6531 nleaves2 += nentries[k]; 6532 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6533 } 6534 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6535 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6536 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6537 PetscCall(PetscSFDestroy(&sf1)); 6538 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6539 6540 /* Build 2nd SF to send remote COOs to their owner */ 6541 PetscSF sf2; 6542 nroots = nroots2; 6543 nleaves = nleaves2; 6544 PetscCall(PetscSFCreate(comm, &sf2)); 6545 PetscCall(PetscSFSetFromOptions(sf2)); 6546 PetscCall(PetscMalloc1(nleaves, &iremote)); 6547 p = 0; 6548 for (k = 0; k < nsend; k++) { 6549 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6550 for (q = 0; q < nentries[k]; q++, p++) { 6551 iremote[p].rank = sendto[k]; 6552 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6553 } 6554 } 6555 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6556 6557 /* Send the remote COOs to their owner */ 6558 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6559 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6560 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6561 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6562 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6563 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6564 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6565 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6566 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6567 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6568 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6569 6570 PetscCall(PetscFree(offsets)); 6571 PetscCall(PetscFree2(sendto, nentries)); 6572 6573 /* Sort received COOs by row along with the permutation array */ 6574 for (k = 0; k < n2; k++) perm2[k] = k; 6575 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6576 6577 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6578 PetscCount *Cperm1; 6579 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6580 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6581 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6582 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6583 6584 /* Support for HYPRE matrices, kind of a hack. 6585 Swap min column with diagonal so that diagonal values will go first */ 6586 PetscBool hypre; 6587 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6588 if (hypre) { 6589 PetscInt *minj; 6590 PetscBT hasdiag; 6591 6592 PetscCall(PetscBTCreate(m, &hasdiag)); 6593 PetscCall(PetscMalloc1(m, &minj)); 6594 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6595 for (k = i1start; k < rem; k++) { 6596 if (j1[k] < cstart || j1[k] >= cend) continue; 6597 const PetscInt rindex = i1[k] - rstart; 6598 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6599 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6600 } 6601 for (k = 0; k < n2; k++) { 6602 if (j2[k] < cstart || j2[k] >= cend) continue; 6603 const PetscInt rindex = i2[k] - rstart; 6604 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6605 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6606 } 6607 for (k = i1start; k < rem; k++) { 6608 const PetscInt rindex = i1[k] - rstart; 6609 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6610 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6611 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6612 } 6613 for (k = 0; k < n2; k++) { 6614 const PetscInt rindex = i2[k] - rstart; 6615 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6616 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6617 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6618 } 6619 PetscCall(PetscBTDestroy(&hasdiag)); 6620 PetscCall(PetscFree(minj)); 6621 } 6622 6623 /* Split local COOs and received COOs into diag/offdiag portions */ 6624 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6625 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6626 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6627 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6628 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6629 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6630 6631 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6632 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6633 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6634 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6635 6636 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6637 PetscInt *Ai, *Bi; 6638 PetscInt *Aj, *Bj; 6639 6640 PetscCall(PetscMalloc1(m + 1, &Ai)); 6641 PetscCall(PetscMalloc1(m + 1, &Bi)); 6642 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6643 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6644 6645 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6646 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6647 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6648 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6649 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6650 6651 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6652 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6653 6654 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6655 /* expect nonzeros in A/B most likely have local contributing entries */ 6656 PetscInt Annz = Ai[m]; 6657 PetscInt Bnnz = Bi[m]; 6658 PetscCount *Ajmap1_new, *Bjmap1_new; 6659 6660 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6661 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6662 6663 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6664 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6665 6666 PetscCall(PetscFree(Aimap1)); 6667 PetscCall(PetscFree(Ajmap1)); 6668 PetscCall(PetscFree(Bimap1)); 6669 PetscCall(PetscFree(Bjmap1)); 6670 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6671 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6672 PetscCall(PetscFree(perm1)); 6673 PetscCall(PetscFree3(i2, j2, perm2)); 6674 6675 Ajmap1 = Ajmap1_new; 6676 Bjmap1 = Bjmap1_new; 6677 6678 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6679 if (Annz < Annz1 + Annz2) { 6680 PetscInt *Aj_new; 6681 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6682 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6683 PetscCall(PetscFree(Aj)); 6684 Aj = Aj_new; 6685 } 6686 6687 if (Bnnz < Bnnz1 + Bnnz2) { 6688 PetscInt *Bj_new; 6689 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6690 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6691 PetscCall(PetscFree(Bj)); 6692 Bj = Bj_new; 6693 } 6694 6695 /* Create new submatrices for on-process and off-process coupling */ 6696 PetscScalar *Aa, *Ba; 6697 MatType rtype; 6698 Mat_SeqAIJ *a, *b; 6699 PetscObjectState state; 6700 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6701 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6702 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6703 if (cstart) { 6704 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6705 } 6706 6707 PetscCall(MatGetRootType_Private(mat, &rtype)); 6708 6709 MatSeqXAIJGetOptions_Private(mpiaij->A); 6710 PetscCall(MatDestroy(&mpiaij->A)); 6711 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6712 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6713 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6714 6715 MatSeqXAIJGetOptions_Private(mpiaij->B); 6716 PetscCall(MatDestroy(&mpiaij->B)); 6717 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6718 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6719 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6720 6721 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6722 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6723 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6724 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6725 6726 a = (Mat_SeqAIJ *)mpiaij->A->data; 6727 b = (Mat_SeqAIJ *)mpiaij->B->data; 6728 a->free_a = PETSC_TRUE; 6729 a->free_ij = PETSC_TRUE; 6730 b->free_a = PETSC_TRUE; 6731 b->free_ij = PETSC_TRUE; 6732 a->maxnz = a->nz; 6733 b->maxnz = b->nz; 6734 6735 /* conversion must happen AFTER multiply setup */ 6736 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6737 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6738 PetscCall(VecDestroy(&mpiaij->lvec)); 6739 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6740 6741 // Put the COO struct in a container and then attach that to the matrix 6742 PetscCall(PetscMalloc1(1, &coo)); 6743 coo->n = coo_n; 6744 coo->sf = sf2; 6745 coo->sendlen = nleaves; 6746 coo->recvlen = nroots; 6747 coo->Annz = Annz; 6748 coo->Bnnz = Bnnz; 6749 coo->Annz2 = Annz2; 6750 coo->Bnnz2 = Bnnz2; 6751 coo->Atot1 = Atot1; 6752 coo->Atot2 = Atot2; 6753 coo->Btot1 = Btot1; 6754 coo->Btot2 = Btot2; 6755 coo->Ajmap1 = Ajmap1; 6756 coo->Aperm1 = Aperm1; 6757 coo->Bjmap1 = Bjmap1; 6758 coo->Bperm1 = Bperm1; 6759 coo->Aimap2 = Aimap2; 6760 coo->Ajmap2 = Ajmap2; 6761 coo->Aperm2 = Aperm2; 6762 coo->Bimap2 = Bimap2; 6763 coo->Bjmap2 = Bjmap2; 6764 coo->Bperm2 = Bperm2; 6765 coo->Cperm1 = Cperm1; 6766 // Allocate in preallocation. If not used, it has zero cost on host 6767 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6768 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6769 PetscCall(PetscContainerSetPointer(container, coo)); 6770 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6771 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6772 PetscCall(PetscContainerDestroy(&container)); 6773 PetscFunctionReturn(PETSC_SUCCESS); 6774 } 6775 6776 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6777 { 6778 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6779 Mat A = mpiaij->A, B = mpiaij->B; 6780 PetscScalar *Aa, *Ba; 6781 PetscScalar *sendbuf, *recvbuf; 6782 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6783 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6784 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6785 const PetscCount *Cperm1; 6786 PetscContainer container; 6787 MatCOOStruct_MPIAIJ *coo; 6788 6789 PetscFunctionBegin; 6790 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6791 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6792 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6793 sendbuf = coo->sendbuf; 6794 recvbuf = coo->recvbuf; 6795 Ajmap1 = coo->Ajmap1; 6796 Ajmap2 = coo->Ajmap2; 6797 Aimap2 = coo->Aimap2; 6798 Bjmap1 = coo->Bjmap1; 6799 Bjmap2 = coo->Bjmap2; 6800 Bimap2 = coo->Bimap2; 6801 Aperm1 = coo->Aperm1; 6802 Aperm2 = coo->Aperm2; 6803 Bperm1 = coo->Bperm1; 6804 Bperm2 = coo->Bperm2; 6805 Cperm1 = coo->Cperm1; 6806 6807 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6808 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6809 6810 /* Pack entries to be sent to remote */ 6811 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6812 6813 /* Send remote entries to their owner and overlap the communication with local computation */ 6814 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6815 /* Add local entries to A and B */ 6816 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6817 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6818 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6819 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6820 } 6821 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6822 PetscScalar sum = 0.0; 6823 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6824 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6825 } 6826 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6827 6828 /* Add received remote entries to A and B */ 6829 for (PetscCount i = 0; i < coo->Annz2; i++) { 6830 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6831 } 6832 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6833 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6834 } 6835 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6836 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6837 PetscFunctionReturn(PETSC_SUCCESS); 6838 } 6839 6840 /*MC 6841 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6842 6843 Options Database Keys: 6844 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6845 6846 Level: beginner 6847 6848 Notes: 6849 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6850 in this case the values associated with the rows and columns one passes in are set to zero 6851 in the matrix 6852 6853 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6854 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6855 6856 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6857 M*/ 6858 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6859 { 6860 Mat_MPIAIJ *b; 6861 PetscMPIInt size; 6862 6863 PetscFunctionBegin; 6864 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6865 6866 PetscCall(PetscNew(&b)); 6867 B->data = (void *)b; 6868 B->ops[0] = MatOps_Values; 6869 B->assembled = PETSC_FALSE; 6870 B->insertmode = NOT_SET_VALUES; 6871 b->size = size; 6872 6873 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6874 6875 /* build cache for off array entries formed */ 6876 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6877 6878 b->donotstash = PETSC_FALSE; 6879 b->colmap = NULL; 6880 b->garray = NULL; 6881 b->roworiented = PETSC_TRUE; 6882 6883 /* stuff used for matrix vector multiply */ 6884 b->lvec = NULL; 6885 b->Mvctx = NULL; 6886 6887 /* stuff for MatGetRow() */ 6888 b->rowindices = NULL; 6889 b->rowvalues = NULL; 6890 b->getrowactive = PETSC_FALSE; 6891 6892 /* flexible pointer used in CUSPARSE classes */ 6893 b->spptr = NULL; 6894 6895 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6896 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6897 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6898 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6899 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6903 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6905 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6906 #if defined(PETSC_HAVE_CUDA) 6907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6908 #endif 6909 #if defined(PETSC_HAVE_HIP) 6910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6911 #endif 6912 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6914 #endif 6915 #if defined(PETSC_HAVE_MKL_SPARSE) 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6917 #endif 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6922 #if defined(PETSC_HAVE_ELEMENTAL) 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6924 #endif 6925 #if defined(PETSC_HAVE_SCALAPACK) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6927 #endif 6928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6930 #if defined(PETSC_HAVE_HYPRE) 6931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6933 #endif 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6938 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6939 PetscFunctionReturn(PETSC_SUCCESS); 6940 } 6941 6942 /*@ 6943 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6944 and "off-diagonal" part of the matrix in CSR format. 6945 6946 Collective 6947 6948 Input Parameters: 6949 + comm - MPI communicator 6950 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6951 . n - This value should be the same as the local size used in creating the 6952 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6953 calculated if `N` is given) For square matrices `n` is almost always `m`. 6954 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6955 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6956 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6957 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6958 . a - matrix values 6959 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6960 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6961 - oa - matrix values 6962 6963 Output Parameter: 6964 . mat - the matrix 6965 6966 Level: advanced 6967 6968 Notes: 6969 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6970 must free the arrays once the matrix has been destroyed and not before. 6971 6972 The `i` and `j` indices are 0 based 6973 6974 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6975 6976 This sets local rows and cannot be used to set off-processor values. 6977 6978 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6979 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6980 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6981 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6982 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6983 communication if it is known that only local entries will be set. 6984 6985 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6986 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6987 @*/ 6988 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6989 { 6990 Mat_MPIAIJ *maij; 6991 6992 PetscFunctionBegin; 6993 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6994 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6995 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6996 PetscCall(MatCreate(comm, mat)); 6997 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6998 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6999 maij = (Mat_MPIAIJ *)(*mat)->data; 7000 7001 (*mat)->preallocated = PETSC_TRUE; 7002 7003 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7004 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7005 7006 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7007 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7008 7009 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7010 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7011 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7012 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7013 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7014 PetscFunctionReturn(PETSC_SUCCESS); 7015 } 7016 7017 typedef struct { 7018 Mat *mp; /* intermediate products */ 7019 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7020 PetscInt cp; /* number of intermediate products */ 7021 7022 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7023 PetscInt *startsj_s, *startsj_r; 7024 PetscScalar *bufa; 7025 Mat P_oth; 7026 7027 /* may take advantage of merging product->B */ 7028 Mat Bloc; /* B-local by merging diag and off-diag */ 7029 7030 /* cusparse does not have support to split between symbolic and numeric phases. 7031 When api_user is true, we don't need to update the numerical values 7032 of the temporary storage */ 7033 PetscBool reusesym; 7034 7035 /* support for COO values insertion */ 7036 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7037 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7038 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7039 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7040 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7041 PetscMemType mtype; 7042 7043 /* customization */ 7044 PetscBool abmerge; 7045 PetscBool P_oth_bind; 7046 } MatMatMPIAIJBACKEND; 7047 7048 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7049 { 7050 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7051 PetscInt i; 7052 7053 PetscFunctionBegin; 7054 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7055 PetscCall(PetscFree(mmdata->bufa)); 7056 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7057 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7058 PetscCall(MatDestroy(&mmdata->P_oth)); 7059 PetscCall(MatDestroy(&mmdata->Bloc)); 7060 PetscCall(PetscSFDestroy(&mmdata->sf)); 7061 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7062 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7063 PetscCall(PetscFree(mmdata->own[0])); 7064 PetscCall(PetscFree(mmdata->own)); 7065 PetscCall(PetscFree(mmdata->off[0])); 7066 PetscCall(PetscFree(mmdata->off)); 7067 PetscCall(PetscFree(mmdata)); 7068 PetscFunctionReturn(PETSC_SUCCESS); 7069 } 7070 7071 /* Copy selected n entries with indices in idx[] of A to v[]. 7072 If idx is NULL, copy the whole data array of A to v[] 7073 */ 7074 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7075 { 7076 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7077 7078 PetscFunctionBegin; 7079 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7080 if (f) { 7081 PetscCall((*f)(A, n, idx, v)); 7082 } else { 7083 const PetscScalar *vv; 7084 7085 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7086 if (n && idx) { 7087 PetscScalar *w = v; 7088 const PetscInt *oi = idx; 7089 PetscInt j; 7090 7091 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7092 } else { 7093 PetscCall(PetscArraycpy(v, vv, n)); 7094 } 7095 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7096 } 7097 PetscFunctionReturn(PETSC_SUCCESS); 7098 } 7099 7100 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7101 { 7102 MatMatMPIAIJBACKEND *mmdata; 7103 PetscInt i, n_d, n_o; 7104 7105 PetscFunctionBegin; 7106 MatCheckProduct(C, 1); 7107 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7108 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7109 if (!mmdata->reusesym) { /* update temporary matrices */ 7110 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7111 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7112 } 7113 mmdata->reusesym = PETSC_FALSE; 7114 7115 for (i = 0; i < mmdata->cp; i++) { 7116 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7117 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7118 } 7119 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7120 PetscInt noff; 7121 7122 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7123 if (mmdata->mptmp[i]) continue; 7124 if (noff) { 7125 PetscInt nown; 7126 7127 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7128 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7129 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7130 n_o += noff; 7131 n_d += nown; 7132 } else { 7133 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7134 7135 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7136 n_d += mm->nz; 7137 } 7138 } 7139 if (mmdata->hasoffproc) { /* offprocess insertion */ 7140 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7141 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7142 } 7143 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7144 PetscFunctionReturn(PETSC_SUCCESS); 7145 } 7146 7147 /* Support for Pt * A, A * P, or Pt * A * P */ 7148 #define MAX_NUMBER_INTERMEDIATE 4 7149 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7150 { 7151 Mat_Product *product = C->product; 7152 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7153 Mat_MPIAIJ *a, *p; 7154 MatMatMPIAIJBACKEND *mmdata; 7155 ISLocalToGlobalMapping P_oth_l2g = NULL; 7156 IS glob = NULL; 7157 const char *prefix; 7158 char pprefix[256]; 7159 const PetscInt *globidx, *P_oth_idx; 7160 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7161 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7162 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7163 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7164 /* a base offset; type-2: sparse with a local to global map table */ 7165 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7166 7167 MatProductType ptype; 7168 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7169 PetscMPIInt size; 7170 7171 PetscFunctionBegin; 7172 MatCheckProduct(C, 1); 7173 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7174 ptype = product->type; 7175 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7176 ptype = MATPRODUCT_AB; 7177 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7178 } 7179 switch (ptype) { 7180 case MATPRODUCT_AB: 7181 A = product->A; 7182 P = product->B; 7183 m = A->rmap->n; 7184 n = P->cmap->n; 7185 M = A->rmap->N; 7186 N = P->cmap->N; 7187 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7188 break; 7189 case MATPRODUCT_AtB: 7190 P = product->A; 7191 A = product->B; 7192 m = P->cmap->n; 7193 n = A->cmap->n; 7194 M = P->cmap->N; 7195 N = A->cmap->N; 7196 hasoffproc = PETSC_TRUE; 7197 break; 7198 case MATPRODUCT_PtAP: 7199 A = product->A; 7200 P = product->B; 7201 m = P->cmap->n; 7202 n = P->cmap->n; 7203 M = P->cmap->N; 7204 N = P->cmap->N; 7205 hasoffproc = PETSC_TRUE; 7206 break; 7207 default: 7208 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7209 } 7210 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7211 if (size == 1) hasoffproc = PETSC_FALSE; 7212 7213 /* defaults */ 7214 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7215 mp[i] = NULL; 7216 mptmp[i] = PETSC_FALSE; 7217 rmapt[i] = -1; 7218 cmapt[i] = -1; 7219 rmapa[i] = NULL; 7220 cmapa[i] = NULL; 7221 } 7222 7223 /* customization */ 7224 PetscCall(PetscNew(&mmdata)); 7225 mmdata->reusesym = product->api_user; 7226 if (ptype == MATPRODUCT_AB) { 7227 if (product->api_user) { 7228 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7229 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7230 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7231 PetscOptionsEnd(); 7232 } else { 7233 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7234 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7235 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7236 PetscOptionsEnd(); 7237 } 7238 } else if (ptype == MATPRODUCT_PtAP) { 7239 if (product->api_user) { 7240 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7241 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7242 PetscOptionsEnd(); 7243 } else { 7244 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7245 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7246 PetscOptionsEnd(); 7247 } 7248 } 7249 a = (Mat_MPIAIJ *)A->data; 7250 p = (Mat_MPIAIJ *)P->data; 7251 PetscCall(MatSetSizes(C, m, n, M, N)); 7252 PetscCall(PetscLayoutSetUp(C->rmap)); 7253 PetscCall(PetscLayoutSetUp(C->cmap)); 7254 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7255 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7256 7257 cp = 0; 7258 switch (ptype) { 7259 case MATPRODUCT_AB: /* A * P */ 7260 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7261 7262 /* A_diag * P_local (merged or not) */ 7263 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7264 /* P is product->B */ 7265 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7266 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7267 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7268 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7269 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7270 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7271 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7272 mp[cp]->product->api_user = product->api_user; 7273 PetscCall(MatProductSetFromOptions(mp[cp])); 7274 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7275 PetscCall(ISGetIndices(glob, &globidx)); 7276 rmapt[cp] = 1; 7277 cmapt[cp] = 2; 7278 cmapa[cp] = globidx; 7279 mptmp[cp] = PETSC_FALSE; 7280 cp++; 7281 } else { /* A_diag * P_diag and A_diag * P_off */ 7282 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 rmapt[cp] = 1; 7292 cmapt[cp] = 1; 7293 mptmp[cp] = PETSC_FALSE; 7294 cp++; 7295 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7296 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7297 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7298 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7299 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7300 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7301 mp[cp]->product->api_user = product->api_user; 7302 PetscCall(MatProductSetFromOptions(mp[cp])); 7303 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7304 rmapt[cp] = 1; 7305 cmapt[cp] = 2; 7306 cmapa[cp] = p->garray; 7307 mptmp[cp] = PETSC_FALSE; 7308 cp++; 7309 } 7310 7311 /* A_off * P_other */ 7312 if (mmdata->P_oth) { 7313 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7314 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7315 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7316 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7317 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7318 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7319 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7320 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7321 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7322 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7323 mp[cp]->product->api_user = product->api_user; 7324 PetscCall(MatProductSetFromOptions(mp[cp])); 7325 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7326 rmapt[cp] = 1; 7327 cmapt[cp] = 2; 7328 cmapa[cp] = P_oth_idx; 7329 mptmp[cp] = PETSC_FALSE; 7330 cp++; 7331 } 7332 break; 7333 7334 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7335 /* A is product->B */ 7336 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7337 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7338 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7339 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7340 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7341 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7342 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7343 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7344 mp[cp]->product->api_user = product->api_user; 7345 PetscCall(MatProductSetFromOptions(mp[cp])); 7346 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7347 PetscCall(ISGetIndices(glob, &globidx)); 7348 rmapt[cp] = 2; 7349 rmapa[cp] = globidx; 7350 cmapt[cp] = 2; 7351 cmapa[cp] = globidx; 7352 mptmp[cp] = PETSC_FALSE; 7353 cp++; 7354 } else { 7355 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7356 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7357 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7358 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7359 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7360 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7361 mp[cp]->product->api_user = product->api_user; 7362 PetscCall(MatProductSetFromOptions(mp[cp])); 7363 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7364 PetscCall(ISGetIndices(glob, &globidx)); 7365 rmapt[cp] = 1; 7366 cmapt[cp] = 2; 7367 cmapa[cp] = globidx; 7368 mptmp[cp] = PETSC_FALSE; 7369 cp++; 7370 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7371 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7372 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7373 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7374 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7375 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7376 mp[cp]->product->api_user = product->api_user; 7377 PetscCall(MatProductSetFromOptions(mp[cp])); 7378 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7379 rmapt[cp] = 2; 7380 rmapa[cp] = p->garray; 7381 cmapt[cp] = 2; 7382 cmapa[cp] = globidx; 7383 mptmp[cp] = PETSC_FALSE; 7384 cp++; 7385 } 7386 break; 7387 case MATPRODUCT_PtAP: 7388 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7389 /* P is product->B */ 7390 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7391 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7392 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7393 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7394 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7395 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7396 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7397 mp[cp]->product->api_user = product->api_user; 7398 PetscCall(MatProductSetFromOptions(mp[cp])); 7399 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7400 PetscCall(ISGetIndices(glob, &globidx)); 7401 rmapt[cp] = 2; 7402 rmapa[cp] = globidx; 7403 cmapt[cp] = 2; 7404 cmapa[cp] = globidx; 7405 mptmp[cp] = PETSC_FALSE; 7406 cp++; 7407 if (mmdata->P_oth) { 7408 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7409 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7410 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7411 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7412 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7413 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7414 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7415 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7416 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7417 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7418 mp[cp]->product->api_user = product->api_user; 7419 PetscCall(MatProductSetFromOptions(mp[cp])); 7420 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7421 mptmp[cp] = PETSC_TRUE; 7422 cp++; 7423 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7424 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7425 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7426 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7427 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7428 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7429 mp[cp]->product->api_user = product->api_user; 7430 PetscCall(MatProductSetFromOptions(mp[cp])); 7431 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7432 rmapt[cp] = 2; 7433 rmapa[cp] = globidx; 7434 cmapt[cp] = 2; 7435 cmapa[cp] = P_oth_idx; 7436 mptmp[cp] = PETSC_FALSE; 7437 cp++; 7438 } 7439 break; 7440 default: 7441 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7442 } 7443 /* sanity check */ 7444 if (size > 1) 7445 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7446 7447 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7448 for (i = 0; i < cp; i++) { 7449 mmdata->mp[i] = mp[i]; 7450 mmdata->mptmp[i] = mptmp[i]; 7451 } 7452 mmdata->cp = cp; 7453 C->product->data = mmdata; 7454 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7455 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7456 7457 /* memory type */ 7458 mmdata->mtype = PETSC_MEMTYPE_HOST; 7459 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7460 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7461 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7462 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7463 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7464 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7465 7466 /* prepare coo coordinates for values insertion */ 7467 7468 /* count total nonzeros of those intermediate seqaij Mats 7469 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7470 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7471 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7472 */ 7473 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7474 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7475 if (mptmp[cp]) continue; 7476 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7477 const PetscInt *rmap = rmapa[cp]; 7478 const PetscInt mr = mp[cp]->rmap->n; 7479 const PetscInt rs = C->rmap->rstart; 7480 const PetscInt re = C->rmap->rend; 7481 const PetscInt *ii = mm->i; 7482 for (i = 0; i < mr; i++) { 7483 const PetscInt gr = rmap[i]; 7484 const PetscInt nz = ii[i + 1] - ii[i]; 7485 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7486 else ncoo_oown += nz; /* this row is local */ 7487 } 7488 } else ncoo_d += mm->nz; 7489 } 7490 7491 /* 7492 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7493 7494 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7495 7496 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7497 7498 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7499 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7500 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7501 7502 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7503 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7504 */ 7505 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7506 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7507 7508 /* gather (i,j) of nonzeros inserted by remote procs */ 7509 if (hasoffproc) { 7510 PetscSF msf; 7511 PetscInt ncoo2, *coo_i2, *coo_j2; 7512 7513 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7514 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7515 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7516 7517 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7518 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7519 PetscInt *idxoff = mmdata->off[cp]; 7520 PetscInt *idxown = mmdata->own[cp]; 7521 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7522 const PetscInt *rmap = rmapa[cp]; 7523 const PetscInt *cmap = cmapa[cp]; 7524 const PetscInt *ii = mm->i; 7525 PetscInt *coi = coo_i + ncoo_o; 7526 PetscInt *coj = coo_j + ncoo_o; 7527 const PetscInt mr = mp[cp]->rmap->n; 7528 const PetscInt rs = C->rmap->rstart; 7529 const PetscInt re = C->rmap->rend; 7530 const PetscInt cs = C->cmap->rstart; 7531 for (i = 0; i < mr; i++) { 7532 const PetscInt *jj = mm->j + ii[i]; 7533 const PetscInt gr = rmap[i]; 7534 const PetscInt nz = ii[i + 1] - ii[i]; 7535 if (gr < rs || gr >= re) { /* this is an offproc row */ 7536 for (j = ii[i]; j < ii[i + 1]; j++) { 7537 *coi++ = gr; 7538 *idxoff++ = j; 7539 } 7540 if (!cmapt[cp]) { /* already global */ 7541 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7542 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7543 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7544 } else { /* offdiag */ 7545 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7546 } 7547 ncoo_o += nz; 7548 } else { /* this is a local row */ 7549 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7550 } 7551 } 7552 } 7553 mmdata->off[cp + 1] = idxoff; 7554 mmdata->own[cp + 1] = idxown; 7555 } 7556 7557 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7558 PetscInt incoo_o; 7559 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7560 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7561 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7562 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7563 ncoo = ncoo_d + ncoo_oown + ncoo2; 7564 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7565 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7566 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7567 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7568 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7569 PetscCall(PetscFree2(coo_i, coo_j)); 7570 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7571 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7572 coo_i = coo_i2; 7573 coo_j = coo_j2; 7574 } else { /* no offproc values insertion */ 7575 ncoo = ncoo_d; 7576 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7577 7578 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7579 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7580 PetscCall(PetscSFSetUp(mmdata->sf)); 7581 } 7582 mmdata->hasoffproc = hasoffproc; 7583 7584 /* gather (i,j) of nonzeros inserted locally */ 7585 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7586 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7587 PetscInt *coi = coo_i + ncoo_d; 7588 PetscInt *coj = coo_j + ncoo_d; 7589 const PetscInt *jj = mm->j; 7590 const PetscInt *ii = mm->i; 7591 const PetscInt *cmap = cmapa[cp]; 7592 const PetscInt *rmap = rmapa[cp]; 7593 const PetscInt mr = mp[cp]->rmap->n; 7594 const PetscInt rs = C->rmap->rstart; 7595 const PetscInt re = C->rmap->rend; 7596 const PetscInt cs = C->cmap->rstart; 7597 7598 if (mptmp[cp]) continue; 7599 if (rmapt[cp] == 1) { /* consecutive rows */ 7600 /* fill coo_i */ 7601 for (i = 0; i < mr; i++) { 7602 const PetscInt gr = i + rs; 7603 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7604 } 7605 /* fill coo_j */ 7606 if (!cmapt[cp]) { /* type-0, already global */ 7607 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7608 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7609 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7610 } else { /* type-2, local to global for sparse columns */ 7611 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7612 } 7613 ncoo_d += mm->nz; 7614 } else if (rmapt[cp] == 2) { /* sparse rows */ 7615 for (i = 0; i < mr; i++) { 7616 const PetscInt *jj = mm->j + ii[i]; 7617 const PetscInt gr = rmap[i]; 7618 const PetscInt nz = ii[i + 1] - ii[i]; 7619 if (gr >= rs && gr < re) { /* local rows */ 7620 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7621 if (!cmapt[cp]) { /* type-0, already global */ 7622 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7623 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7624 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7625 } else { /* type-2, local to global for sparse columns */ 7626 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7627 } 7628 ncoo_d += nz; 7629 } 7630 } 7631 } 7632 } 7633 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7634 PetscCall(ISDestroy(&glob)); 7635 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7636 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7637 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7638 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7639 7640 /* set block sizes */ 7641 A = product->A; 7642 P = product->B; 7643 switch (ptype) { 7644 case MATPRODUCT_PtAP: 7645 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7646 break; 7647 case MATPRODUCT_RARt: 7648 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7649 break; 7650 case MATPRODUCT_ABC: 7651 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7652 break; 7653 case MATPRODUCT_AB: 7654 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7655 break; 7656 case MATPRODUCT_AtB: 7657 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7658 break; 7659 case MATPRODUCT_ABt: 7660 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7661 break; 7662 default: 7663 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7664 } 7665 7666 /* preallocate with COO data */ 7667 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7668 PetscCall(PetscFree2(coo_i, coo_j)); 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7673 { 7674 Mat_Product *product = mat->product; 7675 #if defined(PETSC_HAVE_DEVICE) 7676 PetscBool match = PETSC_FALSE; 7677 PetscBool usecpu = PETSC_FALSE; 7678 #else 7679 PetscBool match = PETSC_TRUE; 7680 #endif 7681 7682 PetscFunctionBegin; 7683 MatCheckProduct(mat, 1); 7684 #if defined(PETSC_HAVE_DEVICE) 7685 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7686 if (match) { /* we can always fallback to the CPU if requested */ 7687 switch (product->type) { 7688 case MATPRODUCT_AB: 7689 if (product->api_user) { 7690 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7691 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7692 PetscOptionsEnd(); 7693 } else { 7694 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7695 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7696 PetscOptionsEnd(); 7697 } 7698 break; 7699 case MATPRODUCT_AtB: 7700 if (product->api_user) { 7701 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7702 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7703 PetscOptionsEnd(); 7704 } else { 7705 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7706 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7707 PetscOptionsEnd(); 7708 } 7709 break; 7710 case MATPRODUCT_PtAP: 7711 if (product->api_user) { 7712 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7713 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7714 PetscOptionsEnd(); 7715 } else { 7716 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7717 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7718 PetscOptionsEnd(); 7719 } 7720 break; 7721 default: 7722 break; 7723 } 7724 match = (PetscBool)!usecpu; 7725 } 7726 #endif 7727 if (match) { 7728 switch (product->type) { 7729 case MATPRODUCT_AB: 7730 case MATPRODUCT_AtB: 7731 case MATPRODUCT_PtAP: 7732 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7733 break; 7734 default: 7735 break; 7736 } 7737 } 7738 /* fallback to MPIAIJ ops */ 7739 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7740 PetscFunctionReturn(PETSC_SUCCESS); 7741 } 7742 7743 /* 7744 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7745 7746 n - the number of block indices in cc[] 7747 cc - the block indices (must be large enough to contain the indices) 7748 */ 7749 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7750 { 7751 PetscInt cnt = -1, nidx, j; 7752 const PetscInt *idx; 7753 7754 PetscFunctionBegin; 7755 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7756 if (nidx) { 7757 cnt = 0; 7758 cc[cnt] = idx[0] / bs; 7759 for (j = 1; j < nidx; j++) { 7760 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7761 } 7762 } 7763 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7764 *n = cnt + 1; 7765 PetscFunctionReturn(PETSC_SUCCESS); 7766 } 7767 7768 /* 7769 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7770 7771 ncollapsed - the number of block indices 7772 collapsed - the block indices (must be large enough to contain the indices) 7773 */ 7774 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7775 { 7776 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7777 7778 PetscFunctionBegin; 7779 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7780 for (i = start + 1; i < start + bs; i++) { 7781 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7782 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7783 cprevtmp = cprev; 7784 cprev = merged; 7785 merged = cprevtmp; 7786 } 7787 *ncollapsed = nprev; 7788 if (collapsed) *collapsed = cprev; 7789 PetscFunctionReturn(PETSC_SUCCESS); 7790 } 7791 7792 /* 7793 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7794 7795 Input Parameter: 7796 . Amat - matrix 7797 - symmetrize - make the result symmetric 7798 + scale - scale with diagonal 7799 7800 Output Parameter: 7801 . a_Gmat - output scalar graph >= 0 7802 7803 */ 7804 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7805 { 7806 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7807 MPI_Comm comm; 7808 Mat Gmat; 7809 PetscBool ismpiaij, isseqaij; 7810 Mat a, b, c; 7811 MatType jtype; 7812 7813 PetscFunctionBegin; 7814 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7815 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7816 PetscCall(MatGetSize(Amat, &MM, &NN)); 7817 PetscCall(MatGetBlockSize(Amat, &bs)); 7818 nloc = (Iend - Istart) / bs; 7819 7820 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7821 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7822 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7823 7824 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7825 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7826 implementation */ 7827 if (bs > 1) { 7828 PetscCall(MatGetType(Amat, &jtype)); 7829 PetscCall(MatCreate(comm, &Gmat)); 7830 PetscCall(MatSetType(Gmat, jtype)); 7831 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7832 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7833 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7834 PetscInt *d_nnz, *o_nnz; 7835 MatScalar *aa, val, *AA; 7836 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7837 7838 if (isseqaij) { 7839 a = Amat; 7840 b = NULL; 7841 } else { 7842 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7843 a = d->A; 7844 b = d->B; 7845 } 7846 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7847 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7848 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7849 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7850 const PetscInt *cols1, *cols2; 7851 7852 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7853 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7854 nnz[brow / bs] = nc2 / bs; 7855 if (nc2 % bs) ok = 0; 7856 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7857 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7858 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7859 if (nc1 != nc2) ok = 0; 7860 else { 7861 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7862 if (cols1[jj] != cols2[jj]) ok = 0; 7863 if (cols1[jj] % bs != jj % bs) ok = 0; 7864 } 7865 } 7866 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7867 } 7868 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7869 if (!ok) { 7870 PetscCall(PetscFree2(d_nnz, o_nnz)); 7871 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7872 goto old_bs; 7873 } 7874 } 7875 } 7876 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7877 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7878 PetscCall(PetscFree2(d_nnz, o_nnz)); 7879 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7880 // diag 7881 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7882 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7883 7884 ai = aseq->i; 7885 n = ai[brow + 1] - ai[brow]; 7886 aj = aseq->j + ai[brow]; 7887 for (PetscInt k = 0; k < n; k += bs) { // block columns 7888 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7889 val = 0; 7890 if (index_size == 0) { 7891 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7892 aa = aseq->a + ai[brow + ii] + k; 7893 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7894 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7895 } 7896 } 7897 } else { // use (index,index) value if provided 7898 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7899 PetscInt ii = index[iii]; 7900 aa = aseq->a + ai[brow + ii] + k; 7901 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7902 PetscInt jj = index[jjj]; 7903 val += PetscAbs(PetscRealPart(aa[jj])); 7904 } 7905 } 7906 } 7907 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7908 AA[k / bs] = val; 7909 } 7910 grow = Istart / bs + brow / bs; 7911 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7912 } 7913 // off-diag 7914 if (ismpiaij) { 7915 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7916 const PetscScalar *vals; 7917 const PetscInt *cols, *garray = aij->garray; 7918 7919 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7920 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7921 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7922 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7923 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7924 AA[k / bs] = 0; 7925 AJ[cidx] = garray[cols[k]] / bs; 7926 } 7927 nc = ncols / bs; 7928 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7929 if (index_size == 0) { 7930 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7931 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7932 for (PetscInt k = 0; k < ncols; k += bs) { 7933 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7934 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7935 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7936 } 7937 } 7938 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7939 } 7940 } else { // use (index,index) value if provided 7941 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7942 PetscInt ii = index[iii]; 7943 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7944 for (PetscInt k = 0; k < ncols; k += bs) { 7945 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7946 PetscInt jj = index[jjj]; 7947 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7948 } 7949 } 7950 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7951 } 7952 } 7953 grow = Istart / bs + brow / bs; 7954 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7955 } 7956 } 7957 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7958 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7959 PetscCall(PetscFree2(AA, AJ)); 7960 } else { 7961 const PetscScalar *vals; 7962 const PetscInt *idx; 7963 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7964 old_bs: 7965 /* 7966 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7967 */ 7968 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7969 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7970 if (isseqaij) { 7971 PetscInt max_d_nnz; 7972 7973 /* 7974 Determine exact preallocation count for (sequential) scalar matrix 7975 */ 7976 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7977 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7978 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7979 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7980 PetscCall(PetscFree3(w0, w1, w2)); 7981 } else if (ismpiaij) { 7982 Mat Daij, Oaij; 7983 const PetscInt *garray; 7984 PetscInt max_d_nnz; 7985 7986 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7987 /* 7988 Determine exact preallocation count for diagonal block portion of scalar matrix 7989 */ 7990 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7991 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7992 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7993 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7994 PetscCall(PetscFree3(w0, w1, w2)); 7995 /* 7996 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7997 */ 7998 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7999 o_nnz[jj] = 0; 8000 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 8001 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8002 o_nnz[jj] += ncols; 8003 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8004 } 8005 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8006 } 8007 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8008 /* get scalar copy (norms) of matrix */ 8009 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8010 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8011 PetscCall(PetscFree2(d_nnz, o_nnz)); 8012 for (Ii = Istart; Ii < Iend; Ii++) { 8013 PetscInt dest_row = Ii / bs; 8014 8015 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8016 for (jj = 0; jj < ncols; jj++) { 8017 PetscInt dest_col = idx[jj] / bs; 8018 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8019 8020 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8021 } 8022 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8023 } 8024 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8025 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8026 } 8027 } else { 8028 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8029 else { 8030 Gmat = Amat; 8031 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8032 } 8033 if (isseqaij) { 8034 a = Gmat; 8035 b = NULL; 8036 } else { 8037 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8038 a = d->A; 8039 b = d->B; 8040 } 8041 if (filter >= 0 || scale) { 8042 /* take absolute value of each entry */ 8043 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8044 MatInfo info; 8045 PetscScalar *avals; 8046 8047 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8048 PetscCall(MatSeqAIJGetArray(c, &avals)); 8049 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8050 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8051 } 8052 } 8053 } 8054 if (symmetrize) { 8055 PetscBool isset, issym; 8056 8057 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8058 if (!isset || !issym) { 8059 Mat matTrans; 8060 8061 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8062 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8063 PetscCall(MatDestroy(&matTrans)); 8064 } 8065 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8066 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8067 if (scale) { 8068 /* scale c for all diagonal values = 1 or -1 */ 8069 Vec diag; 8070 8071 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8072 PetscCall(MatGetDiagonal(Gmat, diag)); 8073 PetscCall(VecReciprocal(diag)); 8074 PetscCall(VecSqrtAbs(diag)); 8075 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8076 PetscCall(VecDestroy(&diag)); 8077 } 8078 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8079 if (filter >= 0) { 8080 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8081 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8082 } 8083 *a_Gmat = Gmat; 8084 PetscFunctionReturn(PETSC_SUCCESS); 8085 } 8086 8087 /* 8088 Special version for direct calls from Fortran 8089 */ 8090 8091 /* Change these macros so can be used in void function */ 8092 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8093 #undef PetscCall 8094 #define PetscCall(...) \ 8095 do { \ 8096 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8097 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8098 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8099 return; \ 8100 } \ 8101 } while (0) 8102 8103 #undef SETERRQ 8104 #define SETERRQ(comm, ierr, ...) \ 8105 do { \ 8106 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8107 return; \ 8108 } while (0) 8109 8110 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8111 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8112 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8113 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8114 #else 8115 #endif 8116 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8117 { 8118 Mat mat = *mmat; 8119 PetscInt m = *mm, n = *mn; 8120 InsertMode addv = *maddv; 8121 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8122 PetscScalar value; 8123 8124 MatCheckPreallocated(mat, 1); 8125 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8126 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8127 { 8128 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8129 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8130 PetscBool roworiented = aij->roworiented; 8131 8132 /* Some Variables required in the macro */ 8133 Mat A = aij->A; 8134 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8135 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8136 MatScalar *aa; 8137 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8138 Mat B = aij->B; 8139 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8140 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8141 MatScalar *ba; 8142 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8143 * cannot use "#if defined" inside a macro. */ 8144 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8145 8146 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8147 PetscInt nonew = a->nonew; 8148 MatScalar *ap1, *ap2; 8149 8150 PetscFunctionBegin; 8151 PetscCall(MatSeqAIJGetArray(A, &aa)); 8152 PetscCall(MatSeqAIJGetArray(B, &ba)); 8153 for (i = 0; i < m; i++) { 8154 if (im[i] < 0) continue; 8155 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8156 if (im[i] >= rstart && im[i] < rend) { 8157 row = im[i] - rstart; 8158 lastcol1 = -1; 8159 rp1 = aj + ai[row]; 8160 ap1 = aa + ai[row]; 8161 rmax1 = aimax[row]; 8162 nrow1 = ailen[row]; 8163 low1 = 0; 8164 high1 = nrow1; 8165 lastcol2 = -1; 8166 rp2 = bj + bi[row]; 8167 ap2 = ba + bi[row]; 8168 rmax2 = bimax[row]; 8169 nrow2 = bilen[row]; 8170 low2 = 0; 8171 high2 = nrow2; 8172 8173 for (j = 0; j < n; j++) { 8174 if (roworiented) value = v[i * n + j]; 8175 else value = v[i + j * m]; 8176 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8177 if (in[j] >= cstart && in[j] < cend) { 8178 col = in[j] - cstart; 8179 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8180 } else if (in[j] < 0) continue; 8181 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8182 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8183 } else { 8184 if (mat->was_assembled) { 8185 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8186 #if defined(PETSC_USE_CTABLE) 8187 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8188 col--; 8189 #else 8190 col = aij->colmap[in[j]] - 1; 8191 #endif 8192 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8193 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8194 col = in[j]; 8195 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8196 B = aij->B; 8197 b = (Mat_SeqAIJ *)B->data; 8198 bimax = b->imax; 8199 bi = b->i; 8200 bilen = b->ilen; 8201 bj = b->j; 8202 rp2 = bj + bi[row]; 8203 ap2 = ba + bi[row]; 8204 rmax2 = bimax[row]; 8205 nrow2 = bilen[row]; 8206 low2 = 0; 8207 high2 = nrow2; 8208 bm = aij->B->rmap->n; 8209 ba = b->a; 8210 inserted = PETSC_FALSE; 8211 } 8212 } else col = in[j]; 8213 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8214 } 8215 } 8216 } else if (!aij->donotstash) { 8217 if (roworiented) { 8218 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8219 } else { 8220 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8221 } 8222 } 8223 } 8224 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8225 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8226 } 8227 PetscFunctionReturnVoid(); 8228 } 8229 8230 /* Undefining these here since they were redefined from their original definition above! No 8231 * other PETSc functions should be defined past this point, as it is impossible to recover the 8232 * original definitions */ 8233 #undef PetscCall 8234 #undef SETERRQ 8235