1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2789 NULL, 2790 NULL, 2791 NULL, 2792 MatGetRowMaxAbs_MPIAIJ, 2793 /*69*/ MatGetRowMinAbs_MPIAIJ, 2794 NULL, 2795 NULL, 2796 MatFDColoringApply_AIJ, 2797 MatSetFromOptions_MPIAIJ, 2798 MatFindZeroDiagonals_MPIAIJ, 2799 /*75*/ NULL, 2800 NULL, 2801 NULL, 2802 MatLoad_MPIAIJ, 2803 NULL, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ NULL, 2808 NULL, 2809 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2810 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2811 NULL, 2812 NULL, 2813 /*89*/ MatBindToCPU_MPIAIJ, 2814 MatProductSetFromOptions_MPIAIJ, 2815 NULL, 2816 NULL, 2817 MatConjugate_MPIAIJ, 2818 /*94*/ NULL, 2819 MatSetValuesRow_MPIAIJ, 2820 MatRealPart_MPIAIJ, 2821 MatImaginaryPart_MPIAIJ, 2822 NULL, 2823 /*99*/ NULL, 2824 NULL, 2825 NULL, 2826 MatGetRowMin_MPIAIJ, 2827 NULL, 2828 /*104*/ MatMissingDiagonal_MPIAIJ, 2829 MatGetSeqNonzeroStructure_MPIAIJ, 2830 NULL, 2831 MatGetGhosts_MPIAIJ, 2832 NULL, 2833 /*109*/ NULL, 2834 MatMultDiagonalBlock_MPIAIJ, 2835 NULL, 2836 NULL, 2837 NULL, 2838 /*114*/ MatGetMultiProcBlock_MPIAIJ, 2839 MatFindNonzeroRows_MPIAIJ, 2840 MatGetColumnReductions_MPIAIJ, 2841 MatInvertBlockDiagonal_MPIAIJ, 2842 MatInvertVariableBlockDiagonal_MPIAIJ, 2843 /*119*/ MatCreateSubMatricesMPI_MPIAIJ, 2844 NULL, 2845 NULL, 2846 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2847 NULL, 2848 /*124*/ NULL, 2849 NULL, 2850 NULL, 2851 MatSetBlockSizes_MPIAIJ, 2852 NULL, 2853 /*129*/ MatFDColoringSetUp_MPIXAIJ, 2854 MatFindOffBlockDiagonalEntries_MPIAIJ, 2855 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2856 NULL, 2857 NULL, 2858 /*134*/ NULL, 2859 MatCreateGraph_Simple_AIJ, 2860 NULL, 2861 MatEliminateZeros_MPIAIJ, 2862 MatGetRowSumAbs_MPIAIJ, 2863 /*139*/ NULL, 2864 NULL, 2865 NULL, 2866 MatCopyHashToXAIJ_MPI_Hash}; 2867 2868 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2869 { 2870 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2871 2872 PetscFunctionBegin; 2873 PetscCall(MatStoreValues(aij->A)); 2874 PetscCall(MatStoreValues(aij->B)); 2875 PetscFunctionReturn(PETSC_SUCCESS); 2876 } 2877 2878 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2879 { 2880 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2881 2882 PetscFunctionBegin; 2883 PetscCall(MatRetrieveValues(aij->A)); 2884 PetscCall(MatRetrieveValues(aij->B)); 2885 PetscFunctionReturn(PETSC_SUCCESS); 2886 } 2887 2888 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2889 { 2890 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2891 PetscMPIInt size; 2892 2893 PetscFunctionBegin; 2894 if (B->hash_active) { 2895 B->ops[0] = b->cops; 2896 B->hash_active = PETSC_FALSE; 2897 } 2898 PetscCall(PetscLayoutSetUp(B->rmap)); 2899 PetscCall(PetscLayoutSetUp(B->cmap)); 2900 2901 #if defined(PETSC_USE_CTABLE) 2902 PetscCall(PetscHMapIDestroy(&b->colmap)); 2903 #else 2904 PetscCall(PetscFree(b->colmap)); 2905 #endif 2906 PetscCall(PetscFree(b->garray)); 2907 PetscCall(VecDestroy(&b->lvec)); 2908 PetscCall(VecScatterDestroy(&b->Mvctx)); 2909 2910 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2911 2912 MatSeqXAIJGetOptions_Private(b->B); 2913 PetscCall(MatDestroy(&b->B)); 2914 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2915 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2916 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2917 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2918 MatSeqXAIJRestoreOptions_Private(b->B); 2919 2920 MatSeqXAIJGetOptions_Private(b->A); 2921 PetscCall(MatDestroy(&b->A)); 2922 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2923 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2924 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2925 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2926 MatSeqXAIJRestoreOptions_Private(b->A); 2927 2928 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2929 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2930 B->preallocated = PETSC_TRUE; 2931 B->was_assembled = PETSC_FALSE; 2932 B->assembled = PETSC_FALSE; 2933 PetscFunctionReturn(PETSC_SUCCESS); 2934 } 2935 2936 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2937 { 2938 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2939 PetscBool ondiagreset, offdiagreset, memoryreset; 2940 2941 PetscFunctionBegin; 2942 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2943 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2944 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2945 2946 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2947 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2948 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2949 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2950 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2951 2952 PetscCall(PetscLayoutSetUp(B->rmap)); 2953 PetscCall(PetscLayoutSetUp(B->cmap)); 2954 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2955 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2956 PetscCall(VecScatterDestroy(&b->Mvctx)); 2957 2958 B->preallocated = PETSC_TRUE; 2959 B->was_assembled = PETSC_FALSE; 2960 B->assembled = PETSC_FALSE; 2961 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2962 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2963 PetscFunctionReturn(PETSC_SUCCESS); 2964 } 2965 2966 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2967 { 2968 Mat mat; 2969 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2970 2971 PetscFunctionBegin; 2972 *newmat = NULL; 2973 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2974 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2975 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2976 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2977 a = (Mat_MPIAIJ *)mat->data; 2978 2979 mat->factortype = matin->factortype; 2980 mat->assembled = matin->assembled; 2981 mat->insertmode = NOT_SET_VALUES; 2982 2983 a->size = oldmat->size; 2984 a->rank = oldmat->rank; 2985 a->donotstash = oldmat->donotstash; 2986 a->roworiented = oldmat->roworiented; 2987 a->rowindices = NULL; 2988 a->rowvalues = NULL; 2989 a->getrowactive = PETSC_FALSE; 2990 2991 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2992 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2993 if (matin->hash_active) { 2994 PetscCall(MatSetUp(mat)); 2995 } else { 2996 mat->preallocated = matin->preallocated; 2997 if (oldmat->colmap) { 2998 #if defined(PETSC_USE_CTABLE) 2999 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3000 #else 3001 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3002 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3003 #endif 3004 } else a->colmap = NULL; 3005 if (oldmat->garray) { 3006 PetscInt len; 3007 len = oldmat->B->cmap->n; 3008 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3009 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3010 } else a->garray = NULL; 3011 3012 /* It may happen MatDuplicate is called with a non-assembled matrix 3013 In fact, MatDuplicate only requires the matrix to be preallocated 3014 This may happen inside a DMCreateMatrix_Shell */ 3015 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3016 if (oldmat->Mvctx) { 3017 a->Mvctx = oldmat->Mvctx; 3018 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3019 } 3020 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3021 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3022 } 3023 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3024 *newmat = mat; 3025 PetscFunctionReturn(PETSC_SUCCESS); 3026 } 3027 3028 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3029 { 3030 PetscBool isbinary, ishdf5; 3031 3032 PetscFunctionBegin; 3033 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3034 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3035 /* force binary viewer to load .info file if it has not yet done so */ 3036 PetscCall(PetscViewerSetUp(viewer)); 3037 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3038 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3039 if (isbinary) { 3040 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3041 } else if (ishdf5) { 3042 #if defined(PETSC_HAVE_HDF5) 3043 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3044 #else 3045 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3046 #endif 3047 } else { 3048 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3049 } 3050 PetscFunctionReturn(PETSC_SUCCESS); 3051 } 3052 3053 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3054 { 3055 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3056 PetscInt *rowidxs, *colidxs; 3057 PetscScalar *matvals; 3058 3059 PetscFunctionBegin; 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 3062 /* read in matrix header */ 3063 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3064 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3065 M = header[1]; 3066 N = header[2]; 3067 nz = header[3]; 3068 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3069 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3070 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3071 3072 /* set block sizes from the viewer's .info file */ 3073 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3074 /* set global sizes if not set already */ 3075 if (mat->rmap->N < 0) mat->rmap->N = M; 3076 if (mat->cmap->N < 0) mat->cmap->N = N; 3077 PetscCall(PetscLayoutSetUp(mat->rmap)); 3078 PetscCall(PetscLayoutSetUp(mat->cmap)); 3079 3080 /* check if the matrix sizes are correct */ 3081 PetscCall(MatGetSize(mat, &rows, &cols)); 3082 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3083 3084 /* read in row lengths and build row indices */ 3085 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3086 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3087 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3088 rowidxs[0] = 0; 3089 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3090 if (nz != PETSC_INT_MAX) { 3091 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3092 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3093 } 3094 3095 /* read in column indices and matrix values */ 3096 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3097 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3098 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3099 /* store matrix indices and values */ 3100 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3101 PetscCall(PetscFree(rowidxs)); 3102 PetscCall(PetscFree2(colidxs, matvals)); 3103 PetscFunctionReturn(PETSC_SUCCESS); 3104 } 3105 3106 /* Not scalable because of ISAllGather() unless getting all columns. */ 3107 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3108 { 3109 IS iscol_local; 3110 PetscBool isstride; 3111 PetscMPIInt gisstride = 0; 3112 3113 PetscFunctionBegin; 3114 /* check if we are grabbing all columns*/ 3115 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3116 3117 if (isstride) { 3118 PetscInt start, len, mstart, mlen; 3119 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3120 PetscCall(ISGetLocalSize(iscol, &len)); 3121 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3122 if (mstart == start && mlen - mstart == len) gisstride = 1; 3123 } 3124 3125 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3126 if (gisstride) { 3127 PetscInt N; 3128 PetscCall(MatGetSize(mat, NULL, &N)); 3129 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3130 PetscCall(ISSetIdentity(iscol_local)); 3131 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3132 } else { 3133 PetscInt cbs; 3134 PetscCall(ISGetBlockSize(iscol, &cbs)); 3135 PetscCall(ISAllGather(iscol, &iscol_local)); 3136 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3137 } 3138 3139 *isseq = iscol_local; 3140 PetscFunctionReturn(PETSC_SUCCESS); 3141 } 3142 3143 /* 3144 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3145 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3146 3147 Input Parameters: 3148 + mat - matrix 3149 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3150 i.e., mat->rstart <= isrow[i] < mat->rend 3151 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3152 i.e., mat->cstart <= iscol[i] < mat->cend 3153 3154 Output Parameters: 3155 + isrow_d - sequential row index set for retrieving mat->A 3156 . iscol_d - sequential column index set for retrieving mat->A 3157 . iscol_o - sequential column index set for retrieving mat->B 3158 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3159 */ 3160 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3161 { 3162 Vec x, cmap; 3163 const PetscInt *is_idx; 3164 PetscScalar *xarray, *cmaparray; 3165 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3167 Mat B = a->B; 3168 Vec lvec = a->lvec, lcmap; 3169 PetscInt i, cstart, cend, Bn = B->cmap->N; 3170 MPI_Comm comm; 3171 VecScatter Mvctx = a->Mvctx; 3172 3173 PetscFunctionBegin; 3174 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3175 PetscCall(ISGetLocalSize(iscol, &ncols)); 3176 3177 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3178 PetscCall(MatCreateVecs(mat, &x, NULL)); 3179 PetscCall(VecSet(x, -1.0)); 3180 PetscCall(VecDuplicate(x, &cmap)); 3181 PetscCall(VecSet(cmap, -1.0)); 3182 3183 /* Get start indices */ 3184 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3185 isstart -= ncols; 3186 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3187 3188 PetscCall(ISGetIndices(iscol, &is_idx)); 3189 PetscCall(VecGetArray(x, &xarray)); 3190 PetscCall(VecGetArray(cmap, &cmaparray)); 3191 PetscCall(PetscMalloc1(ncols, &idx)); 3192 for (i = 0; i < ncols; i++) { 3193 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3194 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3195 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3196 } 3197 PetscCall(VecRestoreArray(x, &xarray)); 3198 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3199 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3200 3201 /* Get iscol_d */ 3202 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3203 PetscCall(ISGetBlockSize(iscol, &i)); 3204 PetscCall(ISSetBlockSize(*iscol_d, i)); 3205 3206 /* Get isrow_d */ 3207 PetscCall(ISGetLocalSize(isrow, &m)); 3208 rstart = mat->rmap->rstart; 3209 PetscCall(PetscMalloc1(m, &idx)); 3210 PetscCall(ISGetIndices(isrow, &is_idx)); 3211 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3212 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3213 3214 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3215 PetscCall(ISGetBlockSize(isrow, &i)); 3216 PetscCall(ISSetBlockSize(*isrow_d, i)); 3217 3218 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3219 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3220 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3221 3222 PetscCall(VecDuplicate(lvec, &lcmap)); 3223 3224 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3225 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3226 3227 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3228 /* off-process column indices */ 3229 count = 0; 3230 PetscCall(PetscMalloc1(Bn, &idx)); 3231 PetscCall(PetscMalloc1(Bn, &cmap1)); 3232 3233 PetscCall(VecGetArray(lvec, &xarray)); 3234 PetscCall(VecGetArray(lcmap, &cmaparray)); 3235 for (i = 0; i < Bn; i++) { 3236 if (PetscRealPart(xarray[i]) > -1.0) { 3237 idx[count] = i; /* local column index in off-diagonal part B */ 3238 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3239 count++; 3240 } 3241 } 3242 PetscCall(VecRestoreArray(lvec, &xarray)); 3243 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3244 3245 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3246 /* cannot ensure iscol_o has same blocksize as iscol! */ 3247 3248 PetscCall(PetscFree(idx)); 3249 *garray = cmap1; 3250 3251 PetscCall(VecDestroy(&x)); 3252 PetscCall(VecDestroy(&cmap)); 3253 PetscCall(VecDestroy(&lcmap)); 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3258 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3259 { 3260 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3261 Mat M = NULL; 3262 MPI_Comm comm; 3263 IS iscol_d, isrow_d, iscol_o; 3264 Mat Asub = NULL, Bsub = NULL; 3265 PetscInt n, count, M_size, N_size; 3266 3267 PetscFunctionBegin; 3268 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3269 3270 if (call == MAT_REUSE_MATRIX) { 3271 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3272 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3273 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3274 3275 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3276 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3277 3278 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3279 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3280 3281 /* Update diagonal and off-diagonal portions of submat */ 3282 asub = (Mat_MPIAIJ *)(*submat)->data; 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3284 PetscCall(ISGetLocalSize(iscol_o, &n)); 3285 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3286 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3287 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3288 3289 } else { /* call == MAT_INITIAL_MATRIX) */ 3290 PetscInt *garray, *garray_compact; 3291 PetscInt BsubN; 3292 3293 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3294 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3295 3296 /* Create local submatrices Asub and Bsub */ 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3298 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3299 3300 // Compact garray so its not of size Bn 3301 PetscCall(ISGetSize(iscol_o, &count)); 3302 PetscCall(PetscMalloc1(count, &garray_compact)); 3303 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3304 3305 /* Create submatrix M */ 3306 PetscCall(ISGetSize(isrow, &M_size)); 3307 PetscCall(ISGetSize(iscol, &N_size)); 3308 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3309 3310 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3311 asub = (Mat_MPIAIJ *)M->data; 3312 3313 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3314 n = asub->B->cmap->N; 3315 if (BsubN > n) { 3316 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3317 const PetscInt *idx; 3318 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3319 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3320 3321 PetscCall(PetscMalloc1(n, &idx_new)); 3322 j = 0; 3323 PetscCall(ISGetIndices(iscol_o, &idx)); 3324 for (i = 0; i < n; i++) { 3325 if (j >= BsubN) break; 3326 while (subgarray[i] > garray[j]) j++; 3327 3328 if (subgarray[i] == garray[j]) { 3329 idx_new[i] = idx[j++]; 3330 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3331 } 3332 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3333 3334 PetscCall(ISDestroy(&iscol_o)); 3335 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3336 3337 } else if (BsubN < n) { 3338 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3339 } 3340 3341 PetscCall(PetscFree(garray)); 3342 *submat = M; 3343 3344 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3345 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3346 PetscCall(ISDestroy(&isrow_d)); 3347 3348 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3349 PetscCall(ISDestroy(&iscol_d)); 3350 3351 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3352 PetscCall(ISDestroy(&iscol_o)); 3353 } 3354 PetscFunctionReturn(PETSC_SUCCESS); 3355 } 3356 3357 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3358 { 3359 IS iscol_local = NULL, isrow_d; 3360 PetscInt csize; 3361 PetscInt n, i, j, start, end; 3362 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3363 MPI_Comm comm; 3364 3365 PetscFunctionBegin; 3366 /* If isrow has same processor distribution as mat, 3367 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3368 if (call == MAT_REUSE_MATRIX) { 3369 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3370 if (isrow_d) { 3371 sameRowDist = PETSC_TRUE; 3372 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3373 } else { 3374 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3375 if (iscol_local) { 3376 sameRowDist = PETSC_TRUE; 3377 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3378 } 3379 } 3380 } else { 3381 /* Check if isrow has same processor distribution as mat */ 3382 sameDist[0] = PETSC_FALSE; 3383 PetscCall(ISGetLocalSize(isrow, &n)); 3384 if (!n) { 3385 sameDist[0] = PETSC_TRUE; 3386 } else { 3387 PetscCall(ISGetMinMax(isrow, &i, &j)); 3388 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3389 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3390 } 3391 3392 /* Check if iscol has same processor distribution as mat */ 3393 sameDist[1] = PETSC_FALSE; 3394 PetscCall(ISGetLocalSize(iscol, &n)); 3395 if (!n) { 3396 sameDist[1] = PETSC_TRUE; 3397 } else { 3398 PetscCall(ISGetMinMax(iscol, &i, &j)); 3399 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3400 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3401 } 3402 3403 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3404 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3405 sameRowDist = tsameDist[0]; 3406 } 3407 3408 if (sameRowDist) { 3409 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3410 /* isrow and iscol have same processor distribution as mat */ 3411 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } else { /* sameRowDist */ 3414 /* isrow has same processor distribution as mat */ 3415 if (call == MAT_INITIAL_MATRIX) { 3416 PetscBool sorted; 3417 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3418 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3419 PetscCall(ISGetSize(iscol, &i)); 3420 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3421 3422 PetscCall(ISSorted(iscol_local, &sorted)); 3423 if (sorted) { 3424 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } 3428 } else { /* call == MAT_REUSE_MATRIX */ 3429 IS iscol_sub; 3430 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3431 if (iscol_sub) { 3432 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3433 PetscFunctionReturn(PETSC_SUCCESS); 3434 } 3435 } 3436 } 3437 } 3438 3439 /* General case: iscol -> iscol_local which has global size of iscol */ 3440 if (call == MAT_REUSE_MATRIX) { 3441 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3442 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3443 } else { 3444 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3445 } 3446 3447 PetscCall(ISGetLocalSize(iscol, &csize)); 3448 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3449 3450 if (call == MAT_INITIAL_MATRIX) { 3451 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3452 PetscCall(ISDestroy(&iscol_local)); 3453 } 3454 PetscFunctionReturn(PETSC_SUCCESS); 3455 } 3456 3457 /*@C 3458 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3459 and "off-diagonal" part of the matrix in CSR format. 3460 3461 Collective 3462 3463 Input Parameters: 3464 + comm - MPI communicator 3465 . M - the global row size 3466 . N - the global column size 3467 . A - "diagonal" portion of matrix 3468 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3469 - garray - either `NULL` or the global index of `B` columns 3470 3471 Output Parameter: 3472 . mat - the matrix, with input `A` as its local diagonal matrix 3473 3474 Level: advanced 3475 3476 Notes: 3477 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3478 3479 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3480 3481 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3482 @*/ 3483 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3484 { 3485 PetscInt m, n; 3486 MatType mpi_mat_type; 3487 Mat_MPIAIJ *mpiaij; 3488 Mat C; 3489 3490 PetscFunctionBegin; 3491 PetscCall(MatCreate(comm, &C)); 3492 PetscCall(MatGetSize(A, &m, &n)); 3493 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3494 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3495 3496 PetscCall(MatSetSizes(C, m, n, M, N)); 3497 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3498 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3499 PetscCall(MatSetType(C, mpi_mat_type)); 3500 3501 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3502 PetscCall(PetscLayoutSetUp(C->rmap)); 3503 PetscCall(PetscLayoutSetUp(C->cmap)); 3504 3505 mpiaij = (Mat_MPIAIJ *)C->data; 3506 mpiaij->A = A; 3507 mpiaij->B = B; 3508 mpiaij->garray = garray; 3509 C->preallocated = PETSC_TRUE; 3510 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3511 3512 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3513 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3514 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3515 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3516 */ 3517 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3518 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3519 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3520 *mat = C; 3521 PetscFunctionReturn(PETSC_SUCCESS); 3522 } 3523 3524 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3525 3526 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3527 { 3528 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3529 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3530 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3531 Mat M, Msub, B = a->B; 3532 MatScalar *aa; 3533 Mat_SeqAIJ *aij; 3534 PetscInt *garray = a->garray, *colsub, Ncols; 3535 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3536 IS iscol_sub, iscmap; 3537 const PetscInt *is_idx, *cmap; 3538 PetscBool allcolumns = PETSC_FALSE; 3539 MPI_Comm comm; 3540 3541 PetscFunctionBegin; 3542 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3543 if (call == MAT_REUSE_MATRIX) { 3544 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3545 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3546 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3547 3548 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3549 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3550 3551 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3552 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3553 3554 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3555 3556 } else { /* call == MAT_INITIAL_MATRIX) */ 3557 PetscBool flg; 3558 3559 PetscCall(ISGetLocalSize(iscol, &n)); 3560 PetscCall(ISGetSize(iscol, &Ncols)); 3561 3562 /* (1) iscol -> nonscalable iscol_local */ 3563 /* Check for special case: each processor gets entire matrix columns */ 3564 PetscCall(ISIdentity(iscol_local, &flg)); 3565 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3566 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3567 if (allcolumns) { 3568 iscol_sub = iscol_local; 3569 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3570 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3571 3572 } else { 3573 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3574 PetscInt *idx, *cmap1, k; 3575 PetscCall(PetscMalloc1(Ncols, &idx)); 3576 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3577 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3578 count = 0; 3579 k = 0; 3580 for (i = 0; i < Ncols; i++) { 3581 j = is_idx[i]; 3582 if (j >= cstart && j < cend) { 3583 /* diagonal part of mat */ 3584 idx[count] = j; 3585 cmap1[count++] = i; /* column index in submat */ 3586 } else if (Bn) { 3587 /* off-diagonal part of mat */ 3588 if (j == garray[k]) { 3589 idx[count] = j; 3590 cmap1[count++] = i; /* column index in submat */ 3591 } else if (j > garray[k]) { 3592 while (j > garray[k] && k < Bn - 1) k++; 3593 if (j == garray[k]) { 3594 idx[count] = j; 3595 cmap1[count++] = i; /* column index in submat */ 3596 } 3597 } 3598 } 3599 } 3600 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3601 3602 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3603 PetscCall(ISGetBlockSize(iscol, &cbs)); 3604 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3605 3606 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3607 } 3608 3609 /* (3) Create sequential Msub */ 3610 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3611 } 3612 3613 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3614 aij = (Mat_SeqAIJ *)Msub->data; 3615 ii = aij->i; 3616 PetscCall(ISGetIndices(iscmap, &cmap)); 3617 3618 /* 3619 m - number of local rows 3620 Ncols - number of columns (same on all processors) 3621 rstart - first row in new global matrix generated 3622 */ 3623 PetscCall(MatGetSize(Msub, &m, NULL)); 3624 3625 if (call == MAT_INITIAL_MATRIX) { 3626 /* (4) Create parallel newmat */ 3627 PetscMPIInt rank, size; 3628 PetscInt csize; 3629 3630 PetscCallMPI(MPI_Comm_size(comm, &size)); 3631 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3632 3633 /* 3634 Determine the number of non-zeros in the diagonal and off-diagonal 3635 portions of the matrix in order to do correct preallocation 3636 */ 3637 3638 /* first get start and end of "diagonal" columns */ 3639 PetscCall(ISGetLocalSize(iscol, &csize)); 3640 if (csize == PETSC_DECIDE) { 3641 PetscCall(ISGetSize(isrow, &mglobal)); 3642 if (mglobal == Ncols) { /* square matrix */ 3643 nlocal = m; 3644 } else { 3645 nlocal = Ncols / size + ((Ncols % size) > rank); 3646 } 3647 } else { 3648 nlocal = csize; 3649 } 3650 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3651 rstart = rend - nlocal; 3652 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3653 3654 /* next, compute all the lengths */ 3655 jj = aij->j; 3656 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3657 olens = dlens + m; 3658 for (i = 0; i < m; i++) { 3659 jend = ii[i + 1] - ii[i]; 3660 olen = 0; 3661 dlen = 0; 3662 for (j = 0; j < jend; j++) { 3663 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3664 else dlen++; 3665 jj++; 3666 } 3667 olens[i] = olen; 3668 dlens[i] = dlen; 3669 } 3670 3671 PetscCall(ISGetBlockSize(isrow, &bs)); 3672 PetscCall(ISGetBlockSize(iscol, &cbs)); 3673 3674 PetscCall(MatCreate(comm, &M)); 3675 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3676 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3677 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3678 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3679 PetscCall(PetscFree(dlens)); 3680 3681 } else { /* call == MAT_REUSE_MATRIX */ 3682 M = *newmat; 3683 PetscCall(MatGetLocalSize(M, &i, NULL)); 3684 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3685 PetscCall(MatZeroEntries(M)); 3686 /* 3687 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3688 rather than the slower MatSetValues(). 3689 */ 3690 M->was_assembled = PETSC_TRUE; 3691 M->assembled = PETSC_FALSE; 3692 } 3693 3694 /* (5) Set values of Msub to *newmat */ 3695 PetscCall(PetscMalloc1(count, &colsub)); 3696 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3697 3698 jj = aij->j; 3699 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3700 for (i = 0; i < m; i++) { 3701 row = rstart + i; 3702 nz = ii[i + 1] - ii[i]; 3703 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3704 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3705 jj += nz; 3706 aa += nz; 3707 } 3708 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3709 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3710 3711 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3712 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3713 3714 PetscCall(PetscFree(colsub)); 3715 3716 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3717 if (call == MAT_INITIAL_MATRIX) { 3718 *newmat = M; 3719 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3720 PetscCall(MatDestroy(&Msub)); 3721 3722 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3723 PetscCall(ISDestroy(&iscol_sub)); 3724 3725 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3726 PetscCall(ISDestroy(&iscmap)); 3727 3728 if (iscol_local) { 3729 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3730 PetscCall(ISDestroy(&iscol_local)); 3731 } 3732 } 3733 PetscFunctionReturn(PETSC_SUCCESS); 3734 } 3735 3736 /* 3737 Not great since it makes two copies of the submatrix, first an SeqAIJ 3738 in local and then by concatenating the local matrices the end result. 3739 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3740 3741 This requires a sequential iscol with all indices. 3742 */ 3743 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3744 { 3745 PetscMPIInt rank, size; 3746 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3747 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3748 Mat M, Mreuse; 3749 MatScalar *aa, *vwork; 3750 MPI_Comm comm; 3751 Mat_SeqAIJ *aij; 3752 PetscBool colflag, allcolumns = PETSC_FALSE; 3753 3754 PetscFunctionBegin; 3755 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3756 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3757 PetscCallMPI(MPI_Comm_size(comm, &size)); 3758 3759 /* Check for special case: each processor gets entire matrix columns */ 3760 PetscCall(ISIdentity(iscol, &colflag)); 3761 PetscCall(ISGetLocalSize(iscol, &n)); 3762 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3763 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3764 3765 if (call == MAT_REUSE_MATRIX) { 3766 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3767 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3768 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3769 } else { 3770 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3771 } 3772 3773 /* 3774 m - number of local rows 3775 n - number of columns (same on all processors) 3776 rstart - first row in new global matrix generated 3777 */ 3778 PetscCall(MatGetSize(Mreuse, &m, &n)); 3779 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3780 if (call == MAT_INITIAL_MATRIX) { 3781 aij = (Mat_SeqAIJ *)Mreuse->data; 3782 ii = aij->i; 3783 jj = aij->j; 3784 3785 /* 3786 Determine the number of non-zeros in the diagonal and off-diagonal 3787 portions of the matrix in order to do correct preallocation 3788 */ 3789 3790 /* first get start and end of "diagonal" columns */ 3791 if (csize == PETSC_DECIDE) { 3792 PetscCall(ISGetSize(isrow, &mglobal)); 3793 if (mglobal == n) { /* square matrix */ 3794 nlocal = m; 3795 } else { 3796 nlocal = n / size + ((n % size) > rank); 3797 } 3798 } else { 3799 nlocal = csize; 3800 } 3801 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3802 rstart = rend - nlocal; 3803 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3804 3805 /* next, compute all the lengths */ 3806 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3807 olens = dlens + m; 3808 for (i = 0; i < m; i++) { 3809 jend = ii[i + 1] - ii[i]; 3810 olen = 0; 3811 dlen = 0; 3812 for (j = 0; j < jend; j++) { 3813 if (*jj < rstart || *jj >= rend) olen++; 3814 else dlen++; 3815 jj++; 3816 } 3817 olens[i] = olen; 3818 dlens[i] = dlen; 3819 } 3820 PetscCall(MatCreate(comm, &M)); 3821 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3822 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3823 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3824 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3825 PetscCall(PetscFree(dlens)); 3826 } else { 3827 PetscInt ml, nl; 3828 3829 M = *newmat; 3830 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3831 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3832 PetscCall(MatZeroEntries(M)); 3833 /* 3834 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3835 rather than the slower MatSetValues(). 3836 */ 3837 M->was_assembled = PETSC_TRUE; 3838 M->assembled = PETSC_FALSE; 3839 } 3840 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3841 aij = (Mat_SeqAIJ *)Mreuse->data; 3842 ii = aij->i; 3843 jj = aij->j; 3844 3845 /* trigger copy to CPU if needed */ 3846 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3847 for (i = 0; i < m; i++) { 3848 row = rstart + i; 3849 nz = ii[i + 1] - ii[i]; 3850 cwork = jj; 3851 jj = PetscSafePointerPlusOffset(jj, nz); 3852 vwork = aa; 3853 aa = PetscSafePointerPlusOffset(aa, nz); 3854 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3855 } 3856 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3857 3858 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3859 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3860 *newmat = M; 3861 3862 /* save submatrix used in processor for next request */ 3863 if (call == MAT_INITIAL_MATRIX) { 3864 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3865 PetscCall(MatDestroy(&Mreuse)); 3866 } 3867 PetscFunctionReturn(PETSC_SUCCESS); 3868 } 3869 3870 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3871 { 3872 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3873 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3874 const PetscInt *JJ; 3875 PetscBool nooffprocentries; 3876 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3877 3878 PetscFunctionBegin; 3879 PetscCall(PetscLayoutSetUp(B->rmap)); 3880 PetscCall(PetscLayoutSetUp(B->cmap)); 3881 m = B->rmap->n; 3882 cstart = B->cmap->rstart; 3883 cend = B->cmap->rend; 3884 rstart = B->rmap->rstart; 3885 irstart = Ii[0]; 3886 3887 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3888 3889 if (PetscDefined(USE_DEBUG)) { 3890 for (i = 0; i < m; i++) { 3891 nnz = Ii[i + 1] - Ii[i]; 3892 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3893 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3894 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3895 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3896 } 3897 } 3898 3899 for (i = 0; i < m; i++) { 3900 nnz = Ii[i + 1] - Ii[i]; 3901 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3902 nnz_max = PetscMax(nnz_max, nnz); 3903 d = 0; 3904 for (j = 0; j < nnz; j++) { 3905 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3906 } 3907 d_nnz[i] = d; 3908 o_nnz[i] = nnz - d; 3909 } 3910 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3911 PetscCall(PetscFree2(d_nnz, o_nnz)); 3912 3913 for (i = 0; i < m; i++) { 3914 ii = i + rstart; 3915 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3916 } 3917 nooffprocentries = B->nooffprocentries; 3918 B->nooffprocentries = PETSC_TRUE; 3919 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3920 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3921 B->nooffprocentries = nooffprocentries; 3922 3923 /* count number of entries below block diagonal */ 3924 PetscCall(PetscFree(Aij->ld)); 3925 PetscCall(PetscCalloc1(m, &ld)); 3926 Aij->ld = ld; 3927 for (i = 0; i < m; i++) { 3928 nnz = Ii[i + 1] - Ii[i]; 3929 j = 0; 3930 while (j < nnz && J[j] < cstart) j++; 3931 ld[i] = j; 3932 if (J) J += nnz; 3933 } 3934 3935 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3936 PetscFunctionReturn(PETSC_SUCCESS); 3937 } 3938 3939 /*@ 3940 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3941 (the default parallel PETSc format). 3942 3943 Collective 3944 3945 Input Parameters: 3946 + B - the matrix 3947 . i - the indices into `j` for the start of each local row (indices start with zero) 3948 . j - the column indices for each local row (indices start with zero) 3949 - v - optional values in the matrix 3950 3951 Level: developer 3952 3953 Notes: 3954 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3955 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3956 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3957 3958 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3959 3960 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3961 3962 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3963 3964 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3965 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3966 3967 The format which is used for the sparse matrix input, is equivalent to a 3968 row-major ordering.. i.e for the following matrix, the input data expected is 3969 as shown 3970 .vb 3971 1 0 0 3972 2 0 3 P0 3973 ------- 3974 4 5 6 P1 3975 3976 Process0 [P0] rows_owned=[0,1] 3977 i = {0,1,3} [size = nrow+1 = 2+1] 3978 j = {0,0,2} [size = 3] 3979 v = {1,2,3} [size = 3] 3980 3981 Process1 [P1] rows_owned=[2] 3982 i = {0,3} [size = nrow+1 = 1+1] 3983 j = {0,1,2} [size = 3] 3984 v = {4,5,6} [size = 3] 3985 .ve 3986 3987 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3988 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3989 @*/ 3990 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3991 { 3992 PetscFunctionBegin; 3993 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3994 PetscFunctionReturn(PETSC_SUCCESS); 3995 } 3996 3997 /*@ 3998 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3999 (the default parallel PETSc format). For good matrix assembly performance 4000 the user should preallocate the matrix storage by setting the parameters 4001 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4002 4003 Collective 4004 4005 Input Parameters: 4006 + B - the matrix 4007 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4008 (same value is used for all local rows) 4009 . d_nnz - array containing the number of nonzeros in the various rows of the 4010 DIAGONAL portion of the local submatrix (possibly different for each row) 4011 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4012 The size of this array is equal to the number of local rows, i.e 'm'. 4013 For matrices that will be factored, you must leave room for (and set) 4014 the diagonal entry even if it is zero. 4015 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4016 submatrix (same value is used for all local rows). 4017 - o_nnz - array containing the number of nonzeros in the various rows of the 4018 OFF-DIAGONAL portion of the local submatrix (possibly different for 4019 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4020 structure. The size of this array is equal to the number 4021 of local rows, i.e 'm'. 4022 4023 Example Usage: 4024 Consider the following 8x8 matrix with 34 non-zero values, that is 4025 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4026 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4027 as follows 4028 4029 .vb 4030 1 2 0 | 0 3 0 | 0 4 4031 Proc0 0 5 6 | 7 0 0 | 8 0 4032 9 0 10 | 11 0 0 | 12 0 4033 ------------------------------------- 4034 13 0 14 | 15 16 17 | 0 0 4035 Proc1 0 18 0 | 19 20 21 | 0 0 4036 0 0 0 | 22 23 0 | 24 0 4037 ------------------------------------- 4038 Proc2 25 26 27 | 0 0 28 | 29 0 4039 30 0 0 | 31 32 33 | 0 34 4040 .ve 4041 4042 This can be represented as a collection of submatrices as 4043 .vb 4044 A B C 4045 D E F 4046 G H I 4047 .ve 4048 4049 Where the submatrices A,B,C are owned by proc0, D,E,F are 4050 owned by proc1, G,H,I are owned by proc2. 4051 4052 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4053 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4054 The 'M','N' parameters are 8,8, and have the same values on all procs. 4055 4056 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4057 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4058 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4059 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4060 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4061 matrix, and [DF] as another `MATSEQAIJ` matrix. 4062 4063 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4064 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4065 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4066 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4067 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4068 In this case, the values of `d_nz`, `o_nz` are 4069 .vb 4070 proc0 dnz = 2, o_nz = 2 4071 proc1 dnz = 3, o_nz = 2 4072 proc2 dnz = 1, o_nz = 4 4073 .ve 4074 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4075 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4076 for proc3. i.e we are using 12+15+10=37 storage locations to store 4077 34 values. 4078 4079 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4080 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4081 In the above case the values for `d_nnz`, `o_nnz` are 4082 .vb 4083 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4084 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4085 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4086 .ve 4087 Here the space allocated is sum of all the above values i.e 34, and 4088 hence pre-allocation is perfect. 4089 4090 Level: intermediate 4091 4092 Notes: 4093 If the *_nnz parameter is given then the *_nz parameter is ignored 4094 4095 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4096 storage. The stored row and column indices begin with zero. 4097 See [Sparse Matrices](sec_matsparse) for details. 4098 4099 The parallel matrix is partitioned such that the first m0 rows belong to 4100 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4101 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4102 4103 The DIAGONAL portion of the local submatrix of a processor can be defined 4104 as the submatrix which is obtained by extraction the part corresponding to 4105 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4106 first row that belongs to the processor, r2 is the last row belonging to 4107 the this processor, and c1-c2 is range of indices of the local part of a 4108 vector suitable for applying the matrix to. This is an mxn matrix. In the 4109 common case of a square matrix, the row and column ranges are the same and 4110 the DIAGONAL part is also square. The remaining portion of the local 4111 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4112 4113 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4114 4115 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4116 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4117 You can also run with the option `-info` and look for messages with the string 4118 malloc in them to see if additional memory allocation was needed. 4119 4120 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4121 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4122 @*/ 4123 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4124 { 4125 PetscFunctionBegin; 4126 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4127 PetscValidType(B, 1); 4128 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4129 PetscFunctionReturn(PETSC_SUCCESS); 4130 } 4131 4132 /*@ 4133 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4134 CSR format for the local rows. 4135 4136 Collective 4137 4138 Input Parameters: 4139 + comm - MPI communicator 4140 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4141 . n - This value should be the same as the local size used in creating the 4142 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4143 calculated if `N` is given) For square matrices n is almost always `m`. 4144 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4145 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4146 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4147 . j - global column indices 4148 - a - optional matrix values 4149 4150 Output Parameter: 4151 . mat - the matrix 4152 4153 Level: intermediate 4154 4155 Notes: 4156 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4157 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4158 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4159 4160 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4161 4162 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4163 4164 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4165 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4166 4167 The format which is used for the sparse matrix input, is equivalent to a 4168 row-major ordering, i.e., for the following matrix, the input data expected is 4169 as shown 4170 .vb 4171 1 0 0 4172 2 0 3 P0 4173 ------- 4174 4 5 6 P1 4175 4176 Process0 [P0] rows_owned=[0,1] 4177 i = {0,1,3} [size = nrow+1 = 2+1] 4178 j = {0,0,2} [size = 3] 4179 v = {1,2,3} [size = 3] 4180 4181 Process1 [P1] rows_owned=[2] 4182 i = {0,3} [size = nrow+1 = 1+1] 4183 j = {0,1,2} [size = 3] 4184 v = {4,5,6} [size = 3] 4185 .ve 4186 4187 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4188 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4189 @*/ 4190 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4191 { 4192 PetscFunctionBegin; 4193 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4194 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4195 PetscCall(MatCreate(comm, mat)); 4196 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4197 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4198 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4199 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4200 PetscFunctionReturn(PETSC_SUCCESS); 4201 } 4202 4203 /*@ 4204 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4205 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4206 from `MatCreateMPIAIJWithArrays()` 4207 4208 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4209 4210 Collective 4211 4212 Input Parameters: 4213 + mat - the matrix 4214 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4215 . n - This value should be the same as the local size used in creating the 4216 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4217 calculated if N is given) For square matrices n is almost always m. 4218 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4219 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4220 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4221 . J - column indices 4222 - v - matrix values 4223 4224 Level: deprecated 4225 4226 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4227 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4228 @*/ 4229 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4230 { 4231 PetscInt nnz, i; 4232 PetscBool nooffprocentries; 4233 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4234 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4235 PetscScalar *ad, *ao; 4236 PetscInt ldi, Iii, md; 4237 const PetscInt *Adi = Ad->i; 4238 PetscInt *ld = Aij->ld; 4239 4240 PetscFunctionBegin; 4241 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4242 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4243 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4244 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4245 4246 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4247 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4248 4249 for (i = 0; i < m; i++) { 4250 if (PetscDefined(USE_DEBUG)) { 4251 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4252 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4253 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4254 } 4255 } 4256 nnz = Ii[i + 1] - Ii[i]; 4257 Iii = Ii[i]; 4258 ldi = ld[i]; 4259 md = Adi[i + 1] - Adi[i]; 4260 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4261 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4262 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4263 ad += md; 4264 ao += nnz - md; 4265 } 4266 nooffprocentries = mat->nooffprocentries; 4267 mat->nooffprocentries = PETSC_TRUE; 4268 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4269 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4270 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4271 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4272 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4273 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4274 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4275 mat->nooffprocentries = nooffprocentries; 4276 PetscFunctionReturn(PETSC_SUCCESS); 4277 } 4278 4279 /*@ 4280 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4281 4282 Collective 4283 4284 Input Parameters: 4285 + mat - the matrix 4286 - v - matrix values, stored by row 4287 4288 Level: intermediate 4289 4290 Notes: 4291 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4292 4293 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4294 4295 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4296 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4297 @*/ 4298 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4299 { 4300 PetscInt nnz, i, m; 4301 PetscBool nooffprocentries; 4302 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4303 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4304 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4305 PetscScalar *ad, *ao; 4306 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4307 PetscInt ldi, Iii, md; 4308 PetscInt *ld = Aij->ld; 4309 4310 PetscFunctionBegin; 4311 m = mat->rmap->n; 4312 4313 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4314 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4315 Iii = 0; 4316 for (i = 0; i < m; i++) { 4317 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4318 ldi = ld[i]; 4319 md = Adi[i + 1] - Adi[i]; 4320 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4321 ad += md; 4322 if (ao) { 4323 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4324 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4325 ao += nnz - md; 4326 } 4327 Iii += nnz; 4328 } 4329 nooffprocentries = mat->nooffprocentries; 4330 mat->nooffprocentries = PETSC_TRUE; 4331 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4332 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4334 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4335 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4336 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4337 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4338 mat->nooffprocentries = nooffprocentries; 4339 PetscFunctionReturn(PETSC_SUCCESS); 4340 } 4341 4342 /*@ 4343 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4344 (the default parallel PETSc format). For good matrix assembly performance 4345 the user should preallocate the matrix storage by setting the parameters 4346 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4347 4348 Collective 4349 4350 Input Parameters: 4351 + comm - MPI communicator 4352 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4353 This value should be the same as the local size used in creating the 4354 y vector for the matrix-vector product y = Ax. 4355 . n - This value should be the same as the local size used in creating the 4356 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4357 calculated if N is given) For square matrices n is almost always m. 4358 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4359 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4360 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4361 (same value is used for all local rows) 4362 . d_nnz - array containing the number of nonzeros in the various rows of the 4363 DIAGONAL portion of the local submatrix (possibly different for each row) 4364 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4365 The size of this array is equal to the number of local rows, i.e 'm'. 4366 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4367 submatrix (same value is used for all local rows). 4368 - o_nnz - array containing the number of nonzeros in the various rows of the 4369 OFF-DIAGONAL portion of the local submatrix (possibly different for 4370 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4371 structure. The size of this array is equal to the number 4372 of local rows, i.e 'm'. 4373 4374 Output Parameter: 4375 . A - the matrix 4376 4377 Options Database Keys: 4378 + -mat_no_inode - Do not use inodes 4379 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4380 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4381 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4382 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4383 4384 Level: intermediate 4385 4386 Notes: 4387 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4388 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4389 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4390 4391 If the *_nnz parameter is given then the *_nz parameter is ignored 4392 4393 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4394 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4395 storage requirements for this matrix. 4396 4397 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4398 processor than it must be used on all processors that share the object for 4399 that argument. 4400 4401 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4402 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4403 4404 The user MUST specify either the local or global matrix dimensions 4405 (possibly both). 4406 4407 The parallel matrix is partitioned across processors such that the 4408 first `m0` rows belong to process 0, the next `m1` rows belong to 4409 process 1, the next `m2` rows belong to process 2, etc., where 4410 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4411 values corresponding to [m x N] submatrix. 4412 4413 The columns are logically partitioned with the n0 columns belonging 4414 to 0th partition, the next n1 columns belonging to the next 4415 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4416 4417 The DIAGONAL portion of the local submatrix on any given processor 4418 is the submatrix corresponding to the rows and columns m,n 4419 corresponding to the given processor. i.e diagonal matrix on 4420 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4421 etc. The remaining portion of the local submatrix [m x (N-n)] 4422 constitute the OFF-DIAGONAL portion. The example below better 4423 illustrates this concept. The two matrices, the DIAGONAL portion and 4424 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4425 4426 For a square global matrix we define each processor's diagonal portion 4427 to be its local rows and the corresponding columns (a square submatrix); 4428 each processor's off-diagonal portion encompasses the remainder of the 4429 local matrix (a rectangular submatrix). 4430 4431 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4432 4433 When calling this routine with a single process communicator, a matrix of 4434 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4435 type of communicator, use the construction mechanism 4436 .vb 4437 MatCreate(..., &A); 4438 MatSetType(A, MATMPIAIJ); 4439 MatSetSizes(A, m, n, M, N); 4440 MatMPIAIJSetPreallocation(A, ...); 4441 .ve 4442 4443 By default, this format uses inodes (identical nodes) when possible. 4444 We search for consecutive rows with the same nonzero structure, thereby 4445 reusing matrix information to achieve increased efficiency. 4446 4447 Example Usage: 4448 Consider the following 8x8 matrix with 34 non-zero values, that is 4449 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4450 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4451 as follows 4452 4453 .vb 4454 1 2 0 | 0 3 0 | 0 4 4455 Proc0 0 5 6 | 7 0 0 | 8 0 4456 9 0 10 | 11 0 0 | 12 0 4457 ------------------------------------- 4458 13 0 14 | 15 16 17 | 0 0 4459 Proc1 0 18 0 | 19 20 21 | 0 0 4460 0 0 0 | 22 23 0 | 24 0 4461 ------------------------------------- 4462 Proc2 25 26 27 | 0 0 28 | 29 0 4463 30 0 0 | 31 32 33 | 0 34 4464 .ve 4465 4466 This can be represented as a collection of submatrices as 4467 4468 .vb 4469 A B C 4470 D E F 4471 G H I 4472 .ve 4473 4474 Where the submatrices A,B,C are owned by proc0, D,E,F are 4475 owned by proc1, G,H,I are owned by proc2. 4476 4477 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4478 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4479 The 'M','N' parameters are 8,8, and have the same values on all procs. 4480 4481 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4482 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4483 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4484 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4485 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4486 matrix, and [DF] as another SeqAIJ matrix. 4487 4488 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4489 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4490 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4491 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4492 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4493 In this case, the values of `d_nz`,`o_nz` are 4494 .vb 4495 proc0 dnz = 2, o_nz = 2 4496 proc1 dnz = 3, o_nz = 2 4497 proc2 dnz = 1, o_nz = 4 4498 .ve 4499 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4500 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4501 for proc3. i.e we are using 12+15+10=37 storage locations to store 4502 34 values. 4503 4504 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4505 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4506 In the above case the values for d_nnz,o_nnz are 4507 .vb 4508 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4509 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4510 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4511 .ve 4512 Here the space allocated is sum of all the above values i.e 34, and 4513 hence pre-allocation is perfect. 4514 4515 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4516 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4517 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4518 @*/ 4519 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4520 { 4521 PetscMPIInt size; 4522 4523 PetscFunctionBegin; 4524 PetscCall(MatCreate(comm, A)); 4525 PetscCall(MatSetSizes(*A, m, n, M, N)); 4526 PetscCallMPI(MPI_Comm_size(comm, &size)); 4527 if (size > 1) { 4528 PetscCall(MatSetType(*A, MATMPIAIJ)); 4529 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4530 } else { 4531 PetscCall(MatSetType(*A, MATSEQAIJ)); 4532 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4533 } 4534 PetscFunctionReturn(PETSC_SUCCESS); 4535 } 4536 4537 /*@C 4538 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4539 4540 Not Collective 4541 4542 Input Parameter: 4543 . A - The `MATMPIAIJ` matrix 4544 4545 Output Parameters: 4546 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4547 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4548 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4549 4550 Level: intermediate 4551 4552 Note: 4553 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4554 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4555 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4556 local column numbers to global column numbers in the original matrix. 4557 4558 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4559 @*/ 4560 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4561 { 4562 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4563 PetscBool flg; 4564 4565 PetscFunctionBegin; 4566 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4567 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4568 if (Ad) *Ad = a->A; 4569 if (Ao) *Ao = a->B; 4570 if (colmap) *colmap = a->garray; 4571 PetscFunctionReturn(PETSC_SUCCESS); 4572 } 4573 4574 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4575 { 4576 PetscInt m, N, i, rstart, nnz, Ii; 4577 PetscInt *indx; 4578 PetscScalar *values; 4579 MatType rootType; 4580 4581 PetscFunctionBegin; 4582 PetscCall(MatGetSize(inmat, &m, &N)); 4583 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4584 PetscInt *dnz, *onz, sum, bs, cbs; 4585 4586 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4587 /* Check sum(n) = N */ 4588 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4589 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4590 4591 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4592 rstart -= m; 4593 4594 MatPreallocateBegin(comm, m, n, dnz, onz); 4595 for (i = 0; i < m; i++) { 4596 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4597 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4598 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4599 } 4600 4601 PetscCall(MatCreate(comm, outmat)); 4602 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4603 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4604 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4605 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4606 PetscCall(MatSetType(*outmat, rootType)); 4607 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4608 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4609 MatPreallocateEnd(dnz, onz); 4610 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4611 } 4612 4613 /* numeric phase */ 4614 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4615 for (i = 0; i < m; i++) { 4616 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4617 Ii = i + rstart; 4618 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4619 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4620 } 4621 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4622 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4623 PetscFunctionReturn(PETSC_SUCCESS); 4624 } 4625 4626 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4627 { 4628 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4629 4630 PetscFunctionBegin; 4631 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4632 PetscCall(PetscFree(merge->id_r)); 4633 PetscCall(PetscFree(merge->len_s)); 4634 PetscCall(PetscFree(merge->len_r)); 4635 PetscCall(PetscFree(merge->bi)); 4636 PetscCall(PetscFree(merge->bj)); 4637 PetscCall(PetscFree(merge->buf_ri[0])); 4638 PetscCall(PetscFree(merge->buf_ri)); 4639 PetscCall(PetscFree(merge->buf_rj[0])); 4640 PetscCall(PetscFree(merge->buf_rj)); 4641 PetscCall(PetscFree(merge->coi)); 4642 PetscCall(PetscFree(merge->coj)); 4643 PetscCall(PetscFree(merge->owners_co)); 4644 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4645 PetscCall(PetscFree(merge)); 4646 PetscFunctionReturn(PETSC_SUCCESS); 4647 } 4648 4649 #include <../src/mat/utils/freespace.h> 4650 #include <petscbt.h> 4651 4652 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4653 { 4654 MPI_Comm comm; 4655 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4656 PetscMPIInt size, rank, taga, *len_s; 4657 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4658 PetscMPIInt proc, k; 4659 PetscInt **buf_ri, **buf_rj; 4660 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4661 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4662 MPI_Request *s_waits, *r_waits; 4663 MPI_Status *status; 4664 const MatScalar *aa, *a_a; 4665 MatScalar **abuf_r, *ba_i; 4666 Mat_Merge_SeqsToMPI *merge; 4667 PetscContainer container; 4668 4669 PetscFunctionBegin; 4670 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4671 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4672 4673 PetscCallMPI(MPI_Comm_size(comm, &size)); 4674 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4675 4676 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4677 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4678 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4679 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4680 aa = a_a; 4681 4682 bi = merge->bi; 4683 bj = merge->bj; 4684 buf_ri = merge->buf_ri; 4685 buf_rj = merge->buf_rj; 4686 4687 PetscCall(PetscMalloc1(size, &status)); 4688 owners = merge->rowmap->range; 4689 len_s = merge->len_s; 4690 4691 /* send and recv matrix values */ 4692 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4693 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4694 4695 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4696 for (proc = 0, k = 0; proc < size; proc++) { 4697 if (!len_s[proc]) continue; 4698 i = owners[proc]; 4699 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4700 k++; 4701 } 4702 4703 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4704 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4705 PetscCall(PetscFree(status)); 4706 4707 PetscCall(PetscFree(s_waits)); 4708 PetscCall(PetscFree(r_waits)); 4709 4710 /* insert mat values of mpimat */ 4711 PetscCall(PetscMalloc1(N, &ba_i)); 4712 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4713 4714 for (k = 0; k < merge->nrecv; k++) { 4715 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4716 nrows = *buf_ri_k[k]; 4717 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4718 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4719 } 4720 4721 /* set values of ba */ 4722 m = merge->rowmap->n; 4723 for (i = 0; i < m; i++) { 4724 arow = owners[rank] + i; 4725 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4726 bnzi = bi[i + 1] - bi[i]; 4727 PetscCall(PetscArrayzero(ba_i, bnzi)); 4728 4729 /* add local non-zero vals of this proc's seqmat into ba */ 4730 anzi = ai[arow + 1] - ai[arow]; 4731 aj = a->j + ai[arow]; 4732 aa = a_a + ai[arow]; 4733 nextaj = 0; 4734 for (j = 0; nextaj < anzi; j++) { 4735 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4736 ba_i[j] += aa[nextaj++]; 4737 } 4738 } 4739 4740 /* add received vals into ba */ 4741 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4742 /* i-th row */ 4743 if (i == *nextrow[k]) { 4744 anzi = *(nextai[k] + 1) - *nextai[k]; 4745 aj = buf_rj[k] + *nextai[k]; 4746 aa = abuf_r[k] + *nextai[k]; 4747 nextaj = 0; 4748 for (j = 0; nextaj < anzi; j++) { 4749 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4750 ba_i[j] += aa[nextaj++]; 4751 } 4752 } 4753 nextrow[k]++; 4754 nextai[k]++; 4755 } 4756 } 4757 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4758 } 4759 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4760 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4761 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4762 4763 PetscCall(PetscFree(abuf_r[0])); 4764 PetscCall(PetscFree(abuf_r)); 4765 PetscCall(PetscFree(ba_i)); 4766 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4767 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4768 PetscFunctionReturn(PETSC_SUCCESS); 4769 } 4770 4771 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4772 { 4773 Mat B_mpi; 4774 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4775 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4776 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4777 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4778 PetscInt len, *dnz, *onz, bs, cbs; 4779 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4780 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4781 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4782 MPI_Status *status; 4783 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4784 PetscBT lnkbt; 4785 Mat_Merge_SeqsToMPI *merge; 4786 PetscContainer container; 4787 4788 PetscFunctionBegin; 4789 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4790 4791 /* make sure it is a PETSc comm */ 4792 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4793 PetscCallMPI(MPI_Comm_size(comm, &size)); 4794 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4795 4796 PetscCall(PetscNew(&merge)); 4797 PetscCall(PetscMalloc1(size, &status)); 4798 4799 /* determine row ownership */ 4800 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4801 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4802 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4803 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4804 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4805 PetscCall(PetscMalloc1(size, &len_si)); 4806 PetscCall(PetscMalloc1(size, &merge->len_s)); 4807 4808 m = merge->rowmap->n; 4809 owners = merge->rowmap->range; 4810 4811 /* determine the number of messages to send, their lengths */ 4812 len_s = merge->len_s; 4813 4814 len = 0; /* length of buf_si[] */ 4815 merge->nsend = 0; 4816 for (PetscMPIInt proc = 0; proc < size; proc++) { 4817 len_si[proc] = 0; 4818 if (proc == rank) { 4819 len_s[proc] = 0; 4820 } else { 4821 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4822 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4823 } 4824 if (len_s[proc]) { 4825 merge->nsend++; 4826 nrows = 0; 4827 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4828 if (ai[i + 1] > ai[i]) nrows++; 4829 } 4830 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4831 len += len_si[proc]; 4832 } 4833 } 4834 4835 /* determine the number and length of messages to receive for ij-structure */ 4836 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4837 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4838 4839 /* post the Irecv of j-structure */ 4840 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4841 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4842 4843 /* post the Isend of j-structure */ 4844 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4845 4846 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4847 if (!len_s[proc]) continue; 4848 i = owners[proc]; 4849 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4850 k++; 4851 } 4852 4853 /* receives and sends of j-structure are complete */ 4854 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4855 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4856 4857 /* send and recv i-structure */ 4858 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4859 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4860 4861 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4862 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4863 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4864 if (!len_s[proc]) continue; 4865 /* form outgoing message for i-structure: 4866 buf_si[0]: nrows to be sent 4867 [1:nrows]: row index (global) 4868 [nrows+1:2*nrows+1]: i-structure index 4869 */ 4870 nrows = len_si[proc] / 2 - 1; 4871 buf_si_i = buf_si + nrows + 1; 4872 buf_si[0] = nrows; 4873 buf_si_i[0] = 0; 4874 nrows = 0; 4875 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4876 anzi = ai[i + 1] - ai[i]; 4877 if (anzi) { 4878 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4879 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4880 nrows++; 4881 } 4882 } 4883 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4884 k++; 4885 buf_si += len_si[proc]; 4886 } 4887 4888 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4889 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4890 4891 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4892 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4893 4894 PetscCall(PetscFree(len_si)); 4895 PetscCall(PetscFree(len_ri)); 4896 PetscCall(PetscFree(rj_waits)); 4897 PetscCall(PetscFree2(si_waits, sj_waits)); 4898 PetscCall(PetscFree(ri_waits)); 4899 PetscCall(PetscFree(buf_s)); 4900 PetscCall(PetscFree(status)); 4901 4902 /* compute a local seq matrix in each processor */ 4903 /* allocate bi array and free space for accumulating nonzero column info */ 4904 PetscCall(PetscMalloc1(m + 1, &bi)); 4905 bi[0] = 0; 4906 4907 /* create and initialize a linked list */ 4908 nlnk = N + 1; 4909 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4910 4911 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4912 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4913 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4914 4915 current_space = free_space; 4916 4917 /* determine symbolic info for each local row */ 4918 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4919 4920 for (k = 0; k < merge->nrecv; k++) { 4921 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4922 nrows = *buf_ri_k[k]; 4923 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4924 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4925 } 4926 4927 MatPreallocateBegin(comm, m, n, dnz, onz); 4928 len = 0; 4929 for (i = 0; i < m; i++) { 4930 bnzi = 0; 4931 /* add local non-zero cols of this proc's seqmat into lnk */ 4932 arow = owners[rank] + i; 4933 anzi = ai[arow + 1] - ai[arow]; 4934 aj = a->j + ai[arow]; 4935 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4936 bnzi += nlnk; 4937 /* add received col data into lnk */ 4938 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4939 if (i == *nextrow[k]) { /* i-th row */ 4940 anzi = *(nextai[k] + 1) - *nextai[k]; 4941 aj = buf_rj[k] + *nextai[k]; 4942 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4943 bnzi += nlnk; 4944 nextrow[k]++; 4945 nextai[k]++; 4946 } 4947 } 4948 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4949 4950 /* if free space is not available, make more free space */ 4951 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4952 /* copy data into free space, then initialize lnk */ 4953 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4954 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4955 4956 current_space->array += bnzi; 4957 current_space->local_used += bnzi; 4958 current_space->local_remaining -= bnzi; 4959 4960 bi[i + 1] = bi[i] + bnzi; 4961 } 4962 4963 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4964 4965 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4966 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4967 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4968 4969 /* create symbolic parallel matrix B_mpi */ 4970 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4971 PetscCall(MatCreate(comm, &B_mpi)); 4972 if (n == PETSC_DECIDE) { 4973 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4974 } else { 4975 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4976 } 4977 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4978 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4979 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4980 MatPreallocateEnd(dnz, onz); 4981 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4982 4983 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4984 B_mpi->assembled = PETSC_FALSE; 4985 merge->bi = bi; 4986 merge->bj = bj; 4987 merge->buf_ri = buf_ri; 4988 merge->buf_rj = buf_rj; 4989 merge->coi = NULL; 4990 merge->coj = NULL; 4991 merge->owners_co = NULL; 4992 4993 PetscCall(PetscCommDestroy(&comm)); 4994 4995 /* attach the supporting struct to B_mpi for reuse */ 4996 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4997 PetscCall(PetscContainerSetPointer(container, merge)); 4998 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 4999 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5000 PetscCall(PetscContainerDestroy(&container)); 5001 *mpimat = B_mpi; 5002 5003 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5004 PetscFunctionReturn(PETSC_SUCCESS); 5005 } 5006 5007 /*@ 5008 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5009 matrices from each processor 5010 5011 Collective 5012 5013 Input Parameters: 5014 + comm - the communicators the parallel matrix will live on 5015 . seqmat - the input sequential matrices 5016 . m - number of local rows (or `PETSC_DECIDE`) 5017 . n - number of local columns (or `PETSC_DECIDE`) 5018 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5019 5020 Output Parameter: 5021 . mpimat - the parallel matrix generated 5022 5023 Level: advanced 5024 5025 Note: 5026 The dimensions of the sequential matrix in each processor MUST be the same. 5027 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5028 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5029 5030 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5031 @*/ 5032 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5033 { 5034 PetscMPIInt size; 5035 5036 PetscFunctionBegin; 5037 PetscCallMPI(MPI_Comm_size(comm, &size)); 5038 if (size == 1) { 5039 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5040 if (scall == MAT_INITIAL_MATRIX) { 5041 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5042 } else { 5043 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5044 } 5045 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5046 PetscFunctionReturn(PETSC_SUCCESS); 5047 } 5048 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5049 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5050 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5051 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5052 PetscFunctionReturn(PETSC_SUCCESS); 5053 } 5054 5055 /*@ 5056 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5057 5058 Not Collective 5059 5060 Input Parameter: 5061 . A - the matrix 5062 5063 Output Parameter: 5064 . A_loc - the local sequential matrix generated 5065 5066 Level: developer 5067 5068 Notes: 5069 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5070 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5071 `n` is the global column count obtained with `MatGetSize()` 5072 5073 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5074 5075 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5076 5077 Destroy the matrix with `MatDestroy()` 5078 5079 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5080 @*/ 5081 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5082 { 5083 PetscBool mpi; 5084 5085 PetscFunctionBegin; 5086 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5087 if (mpi) { 5088 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5089 } else { 5090 *A_loc = A; 5091 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5092 } 5093 PetscFunctionReturn(PETSC_SUCCESS); 5094 } 5095 5096 /*@ 5097 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5098 5099 Not Collective 5100 5101 Input Parameters: 5102 + A - the matrix 5103 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5104 5105 Output Parameter: 5106 . A_loc - the local sequential matrix generated 5107 5108 Level: developer 5109 5110 Notes: 5111 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5112 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5113 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5114 5115 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5116 5117 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5118 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5119 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5120 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5121 5122 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5123 @*/ 5124 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5125 { 5126 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5127 Mat_SeqAIJ *mat, *a, *b; 5128 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5129 const PetscScalar *aa, *ba, *aav, *bav; 5130 PetscScalar *ca, *cam; 5131 PetscMPIInt size; 5132 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5133 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5134 PetscBool match; 5135 5136 PetscFunctionBegin; 5137 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5138 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5139 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5140 if (size == 1) { 5141 if (scall == MAT_INITIAL_MATRIX) { 5142 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5143 *A_loc = mpimat->A; 5144 } else if (scall == MAT_REUSE_MATRIX) { 5145 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5146 } 5147 PetscFunctionReturn(PETSC_SUCCESS); 5148 } 5149 5150 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5151 a = (Mat_SeqAIJ *)mpimat->A->data; 5152 b = (Mat_SeqAIJ *)mpimat->B->data; 5153 ai = a->i; 5154 aj = a->j; 5155 bi = b->i; 5156 bj = b->j; 5157 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5158 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5159 aa = aav; 5160 ba = bav; 5161 if (scall == MAT_INITIAL_MATRIX) { 5162 PetscCall(PetscMalloc1(1 + am, &ci)); 5163 ci[0] = 0; 5164 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5165 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5166 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5167 k = 0; 5168 for (i = 0; i < am; i++) { 5169 ncols_o = bi[i + 1] - bi[i]; 5170 ncols_d = ai[i + 1] - ai[i]; 5171 /* off-diagonal portion of A */ 5172 for (jo = 0; jo < ncols_o; jo++) { 5173 col = cmap[*bj]; 5174 if (col >= cstart) break; 5175 cj[k] = col; 5176 bj++; 5177 ca[k++] = *ba++; 5178 } 5179 /* diagonal portion of A */ 5180 for (j = 0; j < ncols_d; j++) { 5181 cj[k] = cstart + *aj++; 5182 ca[k++] = *aa++; 5183 } 5184 /* off-diagonal portion of A */ 5185 for (j = jo; j < ncols_o; j++) { 5186 cj[k] = cmap[*bj++]; 5187 ca[k++] = *ba++; 5188 } 5189 } 5190 /* put together the new matrix */ 5191 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5192 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5193 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5194 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5195 mat->free_a = PETSC_TRUE; 5196 mat->free_ij = PETSC_TRUE; 5197 mat->nonew = 0; 5198 } else if (scall == MAT_REUSE_MATRIX) { 5199 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5200 ci = mat->i; 5201 cj = mat->j; 5202 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5203 for (i = 0; i < am; i++) { 5204 /* off-diagonal portion of A */ 5205 ncols_o = bi[i + 1] - bi[i]; 5206 for (jo = 0; jo < ncols_o; jo++) { 5207 col = cmap[*bj]; 5208 if (col >= cstart) break; 5209 *cam++ = *ba++; 5210 bj++; 5211 } 5212 /* diagonal portion of A */ 5213 ncols_d = ai[i + 1] - ai[i]; 5214 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5215 /* off-diagonal portion of A */ 5216 for (j = jo; j < ncols_o; j++) { 5217 *cam++ = *ba++; 5218 bj++; 5219 } 5220 } 5221 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5222 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5223 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5224 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5225 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5226 PetscFunctionReturn(PETSC_SUCCESS); 5227 } 5228 5229 /*@ 5230 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5231 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5232 5233 Not Collective 5234 5235 Input Parameters: 5236 + A - the matrix 5237 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5238 5239 Output Parameters: 5240 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5241 - A_loc - the local sequential matrix generated 5242 5243 Level: developer 5244 5245 Note: 5246 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5247 part, then those associated with the off-diagonal part (in its local ordering) 5248 5249 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5250 @*/ 5251 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5252 { 5253 Mat Ao, Ad; 5254 const PetscInt *cmap; 5255 PetscMPIInt size; 5256 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5257 5258 PetscFunctionBegin; 5259 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5260 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5261 if (size == 1) { 5262 if (scall == MAT_INITIAL_MATRIX) { 5263 PetscCall(PetscObjectReference((PetscObject)Ad)); 5264 *A_loc = Ad; 5265 } else if (scall == MAT_REUSE_MATRIX) { 5266 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5267 } 5268 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5269 PetscFunctionReturn(PETSC_SUCCESS); 5270 } 5271 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5272 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5273 if (f) { 5274 PetscCall((*f)(A, scall, glob, A_loc)); 5275 } else { 5276 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5277 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5278 Mat_SeqAIJ *c; 5279 PetscInt *ai = a->i, *aj = a->j; 5280 PetscInt *bi = b->i, *bj = b->j; 5281 PetscInt *ci, *cj; 5282 const PetscScalar *aa, *ba; 5283 PetscScalar *ca; 5284 PetscInt i, j, am, dn, on; 5285 5286 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5287 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5288 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5289 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5290 if (scall == MAT_INITIAL_MATRIX) { 5291 PetscInt k; 5292 PetscCall(PetscMalloc1(1 + am, &ci)); 5293 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5294 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5295 ci[0] = 0; 5296 for (i = 0, k = 0; i < am; i++) { 5297 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5298 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5299 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5300 /* diagonal portion of A */ 5301 for (j = 0; j < ncols_d; j++, k++) { 5302 cj[k] = *aj++; 5303 ca[k] = *aa++; 5304 } 5305 /* off-diagonal portion of A */ 5306 for (j = 0; j < ncols_o; j++, k++) { 5307 cj[k] = dn + *bj++; 5308 ca[k] = *ba++; 5309 } 5310 } 5311 /* put together the new matrix */ 5312 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5313 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5314 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5315 c = (Mat_SeqAIJ *)(*A_loc)->data; 5316 c->free_a = PETSC_TRUE; 5317 c->free_ij = PETSC_TRUE; 5318 c->nonew = 0; 5319 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5320 } else if (scall == MAT_REUSE_MATRIX) { 5321 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5322 for (i = 0; i < am; i++) { 5323 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5324 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5325 /* diagonal portion of A */ 5326 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5327 /* off-diagonal portion of A */ 5328 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5329 } 5330 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5331 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5332 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5333 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5334 if (glob) { 5335 PetscInt cst, *gidx; 5336 5337 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5338 PetscCall(PetscMalloc1(dn + on, &gidx)); 5339 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5340 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5341 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5342 } 5343 } 5344 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5345 PetscFunctionReturn(PETSC_SUCCESS); 5346 } 5347 5348 /*@C 5349 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5350 5351 Not Collective 5352 5353 Input Parameters: 5354 + A - the matrix 5355 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5356 . row - index set of rows to extract (or `NULL`) 5357 - col - index set of columns to extract (or `NULL`) 5358 5359 Output Parameter: 5360 . A_loc - the local sequential matrix generated 5361 5362 Level: developer 5363 5364 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5365 @*/ 5366 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5367 { 5368 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5369 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5370 IS isrowa, iscola; 5371 Mat *aloc; 5372 PetscBool match; 5373 5374 PetscFunctionBegin; 5375 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5376 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5377 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5378 if (!row) { 5379 start = A->rmap->rstart; 5380 end = A->rmap->rend; 5381 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5382 } else { 5383 isrowa = *row; 5384 } 5385 if (!col) { 5386 start = A->cmap->rstart; 5387 cmap = a->garray; 5388 nzA = a->A->cmap->n; 5389 nzB = a->B->cmap->n; 5390 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5391 ncols = 0; 5392 for (i = 0; i < nzB; i++) { 5393 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5394 else break; 5395 } 5396 imark = i; 5397 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5398 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5399 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5400 } else { 5401 iscola = *col; 5402 } 5403 if (scall != MAT_INITIAL_MATRIX) { 5404 PetscCall(PetscMalloc1(1, &aloc)); 5405 aloc[0] = *A_loc; 5406 } 5407 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5408 if (!col) { /* attach global id of condensed columns */ 5409 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5410 } 5411 *A_loc = aloc[0]; 5412 PetscCall(PetscFree(aloc)); 5413 if (!row) PetscCall(ISDestroy(&isrowa)); 5414 if (!col) PetscCall(ISDestroy(&iscola)); 5415 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5416 PetscFunctionReturn(PETSC_SUCCESS); 5417 } 5418 5419 /* 5420 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5421 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5422 * on a global size. 5423 * */ 5424 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5425 { 5426 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5427 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5428 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5429 PetscMPIInt owner; 5430 PetscSFNode *iremote, *oiremote; 5431 const PetscInt *lrowindices; 5432 PetscSF sf, osf; 5433 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5434 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5435 MPI_Comm comm; 5436 ISLocalToGlobalMapping mapping; 5437 const PetscScalar *pd_a, *po_a; 5438 5439 PetscFunctionBegin; 5440 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5441 /* plocalsize is the number of roots 5442 * nrows is the number of leaves 5443 * */ 5444 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5445 PetscCall(ISGetLocalSize(rows, &nrows)); 5446 PetscCall(PetscCalloc1(nrows, &iremote)); 5447 PetscCall(ISGetIndices(rows, &lrowindices)); 5448 for (i = 0; i < nrows; i++) { 5449 /* Find a remote index and an owner for a row 5450 * The row could be local or remote 5451 * */ 5452 owner = 0; 5453 lidx = 0; 5454 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5455 iremote[i].index = lidx; 5456 iremote[i].rank = owner; 5457 } 5458 /* Create SF to communicate how many nonzero columns for each row */ 5459 PetscCall(PetscSFCreate(comm, &sf)); 5460 /* SF will figure out the number of nonzero columns for each row, and their 5461 * offsets 5462 * */ 5463 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5464 PetscCall(PetscSFSetFromOptions(sf)); 5465 PetscCall(PetscSFSetUp(sf)); 5466 5467 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5468 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5469 PetscCall(PetscCalloc1(nrows, &pnnz)); 5470 roffsets[0] = 0; 5471 roffsets[1] = 0; 5472 for (i = 0; i < plocalsize; i++) { 5473 /* diagonal */ 5474 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5475 /* off-diagonal */ 5476 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5477 /* compute offsets so that we relative location for each row */ 5478 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5479 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5480 } 5481 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5482 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5483 /* 'r' means root, and 'l' means leaf */ 5484 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5485 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5486 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5487 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5488 PetscCall(PetscSFDestroy(&sf)); 5489 PetscCall(PetscFree(roffsets)); 5490 PetscCall(PetscFree(nrcols)); 5491 dntotalcols = 0; 5492 ontotalcols = 0; 5493 ncol = 0; 5494 for (i = 0; i < nrows; i++) { 5495 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5496 ncol = PetscMax(pnnz[i], ncol); 5497 /* diagonal */ 5498 dntotalcols += nlcols[i * 2 + 0]; 5499 /* off-diagonal */ 5500 ontotalcols += nlcols[i * 2 + 1]; 5501 } 5502 /* We do not need to figure the right number of columns 5503 * since all the calculations will be done by going through the raw data 5504 * */ 5505 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5506 PetscCall(MatSetUp(*P_oth)); 5507 PetscCall(PetscFree(pnnz)); 5508 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5509 /* diagonal */ 5510 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5511 /* off-diagonal */ 5512 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5513 /* diagonal */ 5514 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5515 /* off-diagonal */ 5516 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5517 dntotalcols = 0; 5518 ontotalcols = 0; 5519 ntotalcols = 0; 5520 for (i = 0; i < nrows; i++) { 5521 owner = 0; 5522 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5523 /* Set iremote for diag matrix */ 5524 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5525 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5526 iremote[dntotalcols].rank = owner; 5527 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5528 ilocal[dntotalcols++] = ntotalcols++; 5529 } 5530 /* off-diagonal */ 5531 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5532 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5533 oiremote[ontotalcols].rank = owner; 5534 oilocal[ontotalcols++] = ntotalcols++; 5535 } 5536 } 5537 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5538 PetscCall(PetscFree(loffsets)); 5539 PetscCall(PetscFree(nlcols)); 5540 PetscCall(PetscSFCreate(comm, &sf)); 5541 /* P serves as roots and P_oth is leaves 5542 * Diag matrix 5543 * */ 5544 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5545 PetscCall(PetscSFSetFromOptions(sf)); 5546 PetscCall(PetscSFSetUp(sf)); 5547 5548 PetscCall(PetscSFCreate(comm, &osf)); 5549 /* off-diagonal */ 5550 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5551 PetscCall(PetscSFSetFromOptions(osf)); 5552 PetscCall(PetscSFSetUp(osf)); 5553 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5554 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5555 /* operate on the matrix internal data to save memory */ 5556 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5557 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5558 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5559 /* Convert to global indices for diag matrix */ 5560 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5561 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5562 /* We want P_oth store global indices */ 5563 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5564 /* Use memory scalable approach */ 5565 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5566 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5567 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5568 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5569 /* Convert back to local indices */ 5570 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5571 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5572 nout = 0; 5573 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5574 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5575 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5576 /* Exchange values */ 5577 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5578 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5579 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5580 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5581 /* Stop PETSc from shrinking memory */ 5582 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5583 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5584 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5585 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5586 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5587 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5588 PetscCall(PetscSFDestroy(&sf)); 5589 PetscCall(PetscSFDestroy(&osf)); 5590 PetscFunctionReturn(PETSC_SUCCESS); 5591 } 5592 5593 /* 5594 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5595 * This supports MPIAIJ and MAIJ 5596 * */ 5597 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5598 { 5599 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5600 Mat_SeqAIJ *p_oth; 5601 IS rows, map; 5602 PetscHMapI hamp; 5603 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5604 MPI_Comm comm; 5605 PetscSF sf, osf; 5606 PetscBool has; 5607 5608 PetscFunctionBegin; 5609 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5610 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5611 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5612 * and then create a submatrix (that often is an overlapping matrix) 5613 * */ 5614 if (reuse == MAT_INITIAL_MATRIX) { 5615 /* Use a hash table to figure out unique keys */ 5616 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5617 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5618 count = 0; 5619 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5620 for (i = 0; i < a->B->cmap->n; i++) { 5621 key = a->garray[i] / dof; 5622 PetscCall(PetscHMapIHas(hamp, key, &has)); 5623 if (!has) { 5624 mapping[i] = count; 5625 PetscCall(PetscHMapISet(hamp, key, count++)); 5626 } else { 5627 /* Current 'i' has the same value the previous step */ 5628 mapping[i] = count - 1; 5629 } 5630 } 5631 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5632 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5633 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5634 PetscCall(PetscCalloc1(htsize, &rowindices)); 5635 off = 0; 5636 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5637 PetscCall(PetscHMapIDestroy(&hamp)); 5638 PetscCall(PetscSortInt(htsize, rowindices)); 5639 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5640 /* In case, the matrix was already created but users want to recreate the matrix */ 5641 PetscCall(MatDestroy(P_oth)); 5642 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5643 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5644 PetscCall(ISDestroy(&map)); 5645 PetscCall(ISDestroy(&rows)); 5646 } else if (reuse == MAT_REUSE_MATRIX) { 5647 /* If matrix was already created, we simply update values using SF objects 5648 * that as attached to the matrix earlier. 5649 */ 5650 const PetscScalar *pd_a, *po_a; 5651 5652 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5653 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5654 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5655 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5656 /* Update values in place */ 5657 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5658 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5659 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5660 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5661 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5662 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5663 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5664 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5665 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5666 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5667 PetscFunctionReturn(PETSC_SUCCESS); 5668 } 5669 5670 /*@C 5671 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5672 5673 Collective 5674 5675 Input Parameters: 5676 + A - the first matrix in `MATMPIAIJ` format 5677 . B - the second matrix in `MATMPIAIJ` format 5678 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5679 5680 Output Parameters: 5681 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5682 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5683 - B_seq - the sequential matrix generated 5684 5685 Level: developer 5686 5687 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5688 @*/ 5689 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5690 { 5691 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5692 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5693 IS isrowb, iscolb; 5694 Mat *bseq = NULL; 5695 5696 PetscFunctionBegin; 5697 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5698 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5699 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5700 5701 if (scall == MAT_INITIAL_MATRIX) { 5702 start = A->cmap->rstart; 5703 cmap = a->garray; 5704 nzA = a->A->cmap->n; 5705 nzB = a->B->cmap->n; 5706 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5707 ncols = 0; 5708 for (i = 0; i < nzB; i++) { /* row < local row index */ 5709 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5710 else break; 5711 } 5712 imark = i; 5713 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5714 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5715 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5716 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5717 } else { 5718 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5719 isrowb = *rowb; 5720 iscolb = *colb; 5721 PetscCall(PetscMalloc1(1, &bseq)); 5722 bseq[0] = *B_seq; 5723 } 5724 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5725 *B_seq = bseq[0]; 5726 PetscCall(PetscFree(bseq)); 5727 if (!rowb) { 5728 PetscCall(ISDestroy(&isrowb)); 5729 } else { 5730 *rowb = isrowb; 5731 } 5732 if (!colb) { 5733 PetscCall(ISDestroy(&iscolb)); 5734 } else { 5735 *colb = iscolb; 5736 } 5737 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5738 PetscFunctionReturn(PETSC_SUCCESS); 5739 } 5740 5741 /* 5742 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5743 of the OFF-DIAGONAL portion of local A 5744 5745 Collective 5746 5747 Input Parameters: 5748 + A,B - the matrices in `MATMPIAIJ` format 5749 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5750 5751 Output Parameter: 5752 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5753 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5754 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5755 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5756 5757 Developer Note: 5758 This directly accesses information inside the VecScatter associated with the matrix-vector product 5759 for this matrix. This is not desirable.. 5760 5761 Level: developer 5762 5763 */ 5764 5765 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5766 { 5767 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5768 VecScatter ctx; 5769 MPI_Comm comm; 5770 const PetscMPIInt *rprocs, *sprocs; 5771 PetscMPIInt nrecvs, nsends; 5772 const PetscInt *srow, *rstarts, *sstarts; 5773 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5774 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5775 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5776 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5777 PetscMPIInt size, tag, rank, nreqs; 5778 5779 PetscFunctionBegin; 5780 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5781 PetscCallMPI(MPI_Comm_size(comm, &size)); 5782 5783 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5784 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5785 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5786 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5787 5788 if (size == 1) { 5789 startsj_s = NULL; 5790 bufa_ptr = NULL; 5791 *B_oth = NULL; 5792 PetscFunctionReturn(PETSC_SUCCESS); 5793 } 5794 5795 ctx = a->Mvctx; 5796 tag = ((PetscObject)ctx)->tag; 5797 5798 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5799 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5800 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5801 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5802 PetscCall(PetscMalloc1(nreqs, &reqs)); 5803 rwaits = reqs; 5804 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5805 5806 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5807 if (scall == MAT_INITIAL_MATRIX) { 5808 /* i-array */ 5809 /* post receives */ 5810 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5811 for (i = 0; i < nrecvs; i++) { 5812 rowlen = rvalues + rstarts[i] * rbs; 5813 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5814 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5815 } 5816 5817 /* pack the outgoing message */ 5818 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5819 5820 sstartsj[0] = 0; 5821 rstartsj[0] = 0; 5822 len = 0; /* total length of j or a array to be sent */ 5823 if (nsends) { 5824 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5825 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5826 } 5827 for (i = 0; i < nsends; i++) { 5828 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5829 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5830 for (j = 0; j < nrows; j++) { 5831 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5832 for (l = 0; l < sbs; l++) { 5833 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5834 5835 rowlen[j * sbs + l] = ncols; 5836 5837 len += ncols; 5838 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5839 } 5840 k++; 5841 } 5842 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5843 5844 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5845 } 5846 /* recvs and sends of i-array are completed */ 5847 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5848 PetscCall(PetscFree(svalues)); 5849 5850 /* allocate buffers for sending j and a arrays */ 5851 PetscCall(PetscMalloc1(len + 1, &bufj)); 5852 PetscCall(PetscMalloc1(len + 1, &bufa)); 5853 5854 /* create i-array of B_oth */ 5855 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5856 5857 b_othi[0] = 0; 5858 len = 0; /* total length of j or a array to be received */ 5859 k = 0; 5860 for (i = 0; i < nrecvs; i++) { 5861 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5862 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5863 for (j = 0; j < nrows; j++) { 5864 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5865 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5866 k++; 5867 } 5868 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5869 } 5870 PetscCall(PetscFree(rvalues)); 5871 5872 /* allocate space for j and a arrays of B_oth */ 5873 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5874 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5875 5876 /* j-array */ 5877 /* post receives of j-array */ 5878 for (i = 0; i < nrecvs; i++) { 5879 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5880 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5881 } 5882 5883 /* pack the outgoing message j-array */ 5884 if (nsends) k = sstarts[0]; 5885 for (i = 0; i < nsends; i++) { 5886 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5887 bufJ = bufj + sstartsj[i]; 5888 for (j = 0; j < nrows; j++) { 5889 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5890 for (ll = 0; ll < sbs; ll++) { 5891 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5892 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5893 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5894 } 5895 } 5896 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5897 } 5898 5899 /* recvs and sends of j-array are completed */ 5900 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5901 } else if (scall == MAT_REUSE_MATRIX) { 5902 sstartsj = *startsj_s; 5903 rstartsj = *startsj_r; 5904 bufa = *bufa_ptr; 5905 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5906 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5907 5908 /* a-array */ 5909 /* post receives of a-array */ 5910 for (i = 0; i < nrecvs; i++) { 5911 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5912 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5913 } 5914 5915 /* pack the outgoing message a-array */ 5916 if (nsends) k = sstarts[0]; 5917 for (i = 0; i < nsends; i++) { 5918 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5919 bufA = bufa + sstartsj[i]; 5920 for (j = 0; j < nrows; j++) { 5921 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5922 for (ll = 0; ll < sbs; ll++) { 5923 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5924 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5925 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5926 } 5927 } 5928 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5929 } 5930 /* recvs and sends of a-array are completed */ 5931 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5932 PetscCall(PetscFree(reqs)); 5933 5934 if (scall == MAT_INITIAL_MATRIX) { 5935 Mat_SeqAIJ *b_oth; 5936 5937 /* put together the new matrix */ 5938 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5939 5940 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5941 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5942 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5943 b_oth->free_a = PETSC_TRUE; 5944 b_oth->free_ij = PETSC_TRUE; 5945 b_oth->nonew = 0; 5946 5947 PetscCall(PetscFree(bufj)); 5948 if (!startsj_s || !bufa_ptr) { 5949 PetscCall(PetscFree2(sstartsj, rstartsj)); 5950 PetscCall(PetscFree(bufa_ptr)); 5951 } else { 5952 *startsj_s = sstartsj; 5953 *startsj_r = rstartsj; 5954 *bufa_ptr = bufa; 5955 } 5956 } else if (scall == MAT_REUSE_MATRIX) { 5957 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5958 } 5959 5960 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5961 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5962 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5963 PetscFunctionReturn(PETSC_SUCCESS); 5964 } 5965 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5967 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5969 #if defined(PETSC_HAVE_MKL_SPARSE) 5970 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5971 #endif 5972 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5974 #if defined(PETSC_HAVE_ELEMENTAL) 5975 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5976 #endif 5977 #if defined(PETSC_HAVE_SCALAPACK) 5978 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5979 #endif 5980 #if defined(PETSC_HAVE_HYPRE) 5981 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5982 #endif 5983 #if defined(PETSC_HAVE_CUDA) 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5985 #endif 5986 #if defined(PETSC_HAVE_HIP) 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 5988 #endif 5989 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5991 #endif 5992 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5993 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5994 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5995 5996 /* 5997 Computes (B'*A')' since computing B*A directly is untenable 5998 5999 n p p 6000 [ ] [ ] [ ] 6001 m [ A ] * n [ B ] = m [ C ] 6002 [ ] [ ] [ ] 6003 6004 */ 6005 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6006 { 6007 Mat At, Bt, Ct; 6008 6009 PetscFunctionBegin; 6010 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6011 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6012 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6013 PetscCall(MatDestroy(&At)); 6014 PetscCall(MatDestroy(&Bt)); 6015 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6016 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6017 PetscCall(MatDestroy(&Ct)); 6018 PetscFunctionReturn(PETSC_SUCCESS); 6019 } 6020 6021 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6022 { 6023 PetscBool cisdense; 6024 6025 PetscFunctionBegin; 6026 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6027 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6028 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6029 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6030 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6031 PetscCall(MatSetUp(C)); 6032 6033 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6034 PetscFunctionReturn(PETSC_SUCCESS); 6035 } 6036 6037 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6038 { 6039 Mat_Product *product = C->product; 6040 Mat A = product->A, B = product->B; 6041 6042 PetscFunctionBegin; 6043 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6044 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6045 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6046 C->ops->productsymbolic = MatProductSymbolic_AB; 6047 PetscFunctionReturn(PETSC_SUCCESS); 6048 } 6049 6050 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6051 { 6052 Mat_Product *product = C->product; 6053 6054 PetscFunctionBegin; 6055 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6056 PetscFunctionReturn(PETSC_SUCCESS); 6057 } 6058 6059 /* 6060 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6061 6062 Input Parameters: 6063 6064 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6065 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6066 6067 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6068 6069 For Set1, j1[] contains column indices of the nonzeros. 6070 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6071 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6072 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6073 6074 Similar for Set2. 6075 6076 This routine merges the two sets of nonzeros row by row and removes repeats. 6077 6078 Output Parameters: (memory is allocated by the caller) 6079 6080 i[],j[]: the CSR of the merged matrix, which has m rows. 6081 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6082 imap2[]: similar to imap1[], but for Set2. 6083 Note we order nonzeros row-by-row and from left to right. 6084 */ 6085 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6086 { 6087 PetscInt r, m; /* Row index of mat */ 6088 PetscCount t, t1, t2, b1, e1, b2, e2; 6089 6090 PetscFunctionBegin; 6091 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6092 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6093 i[0] = 0; 6094 for (r = 0; r < m; r++) { /* Do row by row merging */ 6095 b1 = rowBegin1[r]; 6096 e1 = rowEnd1[r]; 6097 b2 = rowBegin2[r]; 6098 e2 = rowEnd2[r]; 6099 while (b1 < e1 && b2 < e2) { 6100 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6101 j[t] = j1[b1]; 6102 imap1[t1] = t; 6103 imap2[t2] = t; 6104 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6105 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6106 t1++; 6107 t2++; 6108 t++; 6109 } else if (j1[b1] < j2[b2]) { 6110 j[t] = j1[b1]; 6111 imap1[t1] = t; 6112 b1 += jmap1[t1 + 1] - jmap1[t1]; 6113 t1++; 6114 t++; 6115 } else { 6116 j[t] = j2[b2]; 6117 imap2[t2] = t; 6118 b2 += jmap2[t2 + 1] - jmap2[t2]; 6119 t2++; 6120 t++; 6121 } 6122 } 6123 /* Merge the remaining in either j1[] or j2[] */ 6124 while (b1 < e1) { 6125 j[t] = j1[b1]; 6126 imap1[t1] = t; 6127 b1 += jmap1[t1 + 1] - jmap1[t1]; 6128 t1++; 6129 t++; 6130 } 6131 while (b2 < e2) { 6132 j[t] = j2[b2]; 6133 imap2[t2] = t; 6134 b2 += jmap2[t2 + 1] - jmap2[t2]; 6135 t2++; 6136 t++; 6137 } 6138 PetscCall(PetscIntCast(t, i + r + 1)); 6139 } 6140 PetscFunctionReturn(PETSC_SUCCESS); 6141 } 6142 6143 /* 6144 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6145 6146 Input Parameters: 6147 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6148 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6149 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6150 6151 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6152 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6153 6154 Output Parameters: 6155 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6156 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6157 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6158 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6159 6160 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6161 Atot: number of entries belonging to the diagonal block. 6162 Annz: number of unique nonzeros belonging to the diagonal block. 6163 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6164 repeats (i.e., same 'i,j' pair). 6165 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6166 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6167 6168 Atot: number of entries belonging to the diagonal block 6169 Annz: number of unique nonzeros belonging to the diagonal block. 6170 6171 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6172 6173 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6174 */ 6175 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6176 { 6177 PetscInt cstart, cend, rstart, rend, row, col; 6178 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6179 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6180 PetscCount k, m, p, q, r, s, mid; 6181 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6182 6183 PetscFunctionBegin; 6184 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6185 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6186 m = rend - rstart; 6187 6188 /* Skip negative rows */ 6189 for (k = 0; k < n; k++) 6190 if (i[k] >= 0) break; 6191 6192 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6193 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6194 */ 6195 while (k < n) { 6196 row = i[k]; 6197 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6198 for (s = k; s < n; s++) 6199 if (i[s] != row) break; 6200 6201 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6202 for (p = k; p < s; p++) { 6203 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6204 } 6205 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6206 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6207 rowBegin[row - rstart] = k; 6208 rowMid[row - rstart] = mid; 6209 rowEnd[row - rstart] = s; 6210 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6211 6212 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6213 Atot += mid - k; 6214 Btot += s - mid; 6215 6216 /* Count unique nonzeros of this diag row */ 6217 for (p = k; p < mid;) { 6218 col = j[p]; 6219 do { 6220 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6221 p++; 6222 } while (p < mid && j[p] == col); 6223 Annz++; 6224 } 6225 6226 /* Count unique nonzeros of this offdiag row */ 6227 for (p = mid; p < s;) { 6228 col = j[p]; 6229 do { 6230 p++; 6231 } while (p < s && j[p] == col); 6232 Bnnz++; 6233 } 6234 k = s; 6235 } 6236 6237 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6238 PetscCall(PetscMalloc1(Atot, &Aperm)); 6239 PetscCall(PetscMalloc1(Btot, &Bperm)); 6240 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6241 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6242 6243 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6244 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6245 for (r = 0; r < m; r++) { 6246 k = rowBegin[r]; 6247 mid = rowMid[r]; 6248 s = rowEnd[r]; 6249 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6250 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6251 Atot += mid - k; 6252 Btot += s - mid; 6253 6254 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6255 for (p = k; p < mid;) { 6256 col = j[p]; 6257 q = p; 6258 do { 6259 p++; 6260 } while (p < mid && j[p] == col); 6261 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6262 Annz++; 6263 } 6264 6265 for (p = mid; p < s;) { 6266 col = j[p]; 6267 q = p; 6268 do { 6269 p++; 6270 } while (p < s && j[p] == col); 6271 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6272 Bnnz++; 6273 } 6274 } 6275 /* Output */ 6276 *Aperm_ = Aperm; 6277 *Annz_ = Annz; 6278 *Atot_ = Atot; 6279 *Ajmap_ = Ajmap; 6280 *Bperm_ = Bperm; 6281 *Bnnz_ = Bnnz; 6282 *Btot_ = Btot; 6283 *Bjmap_ = Bjmap; 6284 PetscFunctionReturn(PETSC_SUCCESS); 6285 } 6286 6287 /* 6288 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6289 6290 Input Parameters: 6291 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6292 nnz: number of unique nonzeros in the merged matrix 6293 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6294 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6295 6296 Output Parameter: (memory is allocated by the caller) 6297 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6298 6299 Example: 6300 nnz1 = 4 6301 nnz = 6 6302 imap = [1,3,4,5] 6303 jmap = [0,3,5,6,7] 6304 then, 6305 jmap_new = [0,0,3,3,5,6,7] 6306 */ 6307 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6308 { 6309 PetscCount k, p; 6310 6311 PetscFunctionBegin; 6312 jmap_new[0] = 0; 6313 p = nnz; /* p loops over jmap_new[] backwards */ 6314 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6315 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6316 } 6317 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6318 PetscFunctionReturn(PETSC_SUCCESS); 6319 } 6320 6321 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6322 { 6323 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6324 6325 PetscFunctionBegin; 6326 PetscCall(PetscSFDestroy(&coo->sf)); 6327 PetscCall(PetscFree(coo->Aperm1)); 6328 PetscCall(PetscFree(coo->Bperm1)); 6329 PetscCall(PetscFree(coo->Ajmap1)); 6330 PetscCall(PetscFree(coo->Bjmap1)); 6331 PetscCall(PetscFree(coo->Aimap2)); 6332 PetscCall(PetscFree(coo->Bimap2)); 6333 PetscCall(PetscFree(coo->Aperm2)); 6334 PetscCall(PetscFree(coo->Bperm2)); 6335 PetscCall(PetscFree(coo->Ajmap2)); 6336 PetscCall(PetscFree(coo->Bjmap2)); 6337 PetscCall(PetscFree(coo->Cperm1)); 6338 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6339 PetscCall(PetscFree(coo)); 6340 PetscFunctionReturn(PETSC_SUCCESS); 6341 } 6342 6343 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6344 { 6345 MPI_Comm comm; 6346 PetscMPIInt rank, size; 6347 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6348 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6349 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6350 PetscContainer container; 6351 MatCOOStruct_MPIAIJ *coo; 6352 6353 PetscFunctionBegin; 6354 PetscCall(PetscFree(mpiaij->garray)); 6355 PetscCall(VecDestroy(&mpiaij->lvec)); 6356 #if defined(PETSC_USE_CTABLE) 6357 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6358 #else 6359 PetscCall(PetscFree(mpiaij->colmap)); 6360 #endif 6361 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6362 mat->assembled = PETSC_FALSE; 6363 mat->was_assembled = PETSC_FALSE; 6364 6365 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6366 PetscCallMPI(MPI_Comm_size(comm, &size)); 6367 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6368 PetscCall(PetscLayoutSetUp(mat->rmap)); 6369 PetscCall(PetscLayoutSetUp(mat->cmap)); 6370 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6371 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6372 PetscCall(MatGetLocalSize(mat, &m, &n)); 6373 PetscCall(MatGetSize(mat, &M, &N)); 6374 6375 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6376 /* entries come first, then local rows, then remote rows. */ 6377 PetscCount n1 = coo_n, *perm1; 6378 PetscInt *i1 = coo_i, *j1 = coo_j; 6379 6380 PetscCall(PetscMalloc1(n1, &perm1)); 6381 for (k = 0; k < n1; k++) perm1[k] = k; 6382 6383 /* Manipulate indices so that entries with negative row or col indices will have smallest 6384 row indices, local entries will have greater but negative row indices, and remote entries 6385 will have positive row indices. 6386 */ 6387 for (k = 0; k < n1; k++) { 6388 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6389 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6390 else { 6391 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6392 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6393 } 6394 } 6395 6396 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6397 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6398 6399 /* Advance k to the first entry we need to take care of */ 6400 for (k = 0; k < n1; k++) 6401 if (i1[k] > PETSC_INT_MIN) break; 6402 PetscCount i1start = k; 6403 6404 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6405 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6406 6407 PetscCheck(i1 == NULL || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6408 6409 /* Send remote rows to their owner */ 6410 /* Find which rows should be sent to which remote ranks*/ 6411 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6412 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6413 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6414 const PetscInt *ranges; 6415 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6416 6417 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6418 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6419 for (k = rem; k < n1;) { 6420 PetscMPIInt owner; 6421 PetscInt firstRow, lastRow; 6422 6423 /* Locate a row range */ 6424 firstRow = i1[k]; /* first row of this owner */ 6425 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6426 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6427 6428 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6429 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6430 6431 /* All entries in [k,p) belong to this remote owner */ 6432 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6433 PetscMPIInt *sendto2; 6434 PetscInt *nentries2; 6435 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6436 6437 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6438 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6439 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6440 PetscCall(PetscFree2(sendto, nentries2)); 6441 sendto = sendto2; 6442 nentries = nentries2; 6443 maxNsend = maxNsend2; 6444 } 6445 sendto[nsend] = owner; 6446 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6447 nsend++; 6448 k = p; 6449 } 6450 6451 /* Build 1st SF to know offsets on remote to send data */ 6452 PetscSF sf1; 6453 PetscInt nroots = 1, nroots2 = 0; 6454 PetscInt nleaves = nsend, nleaves2 = 0; 6455 PetscInt *offsets; 6456 PetscSFNode *iremote; 6457 6458 PetscCall(PetscSFCreate(comm, &sf1)); 6459 PetscCall(PetscMalloc1(nsend, &iremote)); 6460 PetscCall(PetscMalloc1(nsend, &offsets)); 6461 for (k = 0; k < nsend; k++) { 6462 iremote[k].rank = sendto[k]; 6463 iremote[k].index = 0; 6464 nleaves2 += nentries[k]; 6465 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6466 } 6467 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6468 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6469 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6470 PetscCall(PetscSFDestroy(&sf1)); 6471 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6472 6473 /* Build 2nd SF to send remote COOs to their owner */ 6474 PetscSF sf2; 6475 nroots = nroots2; 6476 nleaves = nleaves2; 6477 PetscCall(PetscSFCreate(comm, &sf2)); 6478 PetscCall(PetscSFSetFromOptions(sf2)); 6479 PetscCall(PetscMalloc1(nleaves, &iremote)); 6480 p = 0; 6481 for (k = 0; k < nsend; k++) { 6482 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6483 for (q = 0; q < nentries[k]; q++, p++) { 6484 iremote[p].rank = sendto[k]; 6485 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6486 } 6487 } 6488 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6489 6490 /* Send the remote COOs to their owner */ 6491 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6492 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6493 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6494 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6495 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6496 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6497 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6498 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6499 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6500 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6501 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6502 6503 PetscCall(PetscFree(offsets)); 6504 PetscCall(PetscFree2(sendto, nentries)); 6505 6506 /* Sort received COOs by row along with the permutation array */ 6507 for (k = 0; k < n2; k++) perm2[k] = k; 6508 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6509 6510 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6511 PetscCount *Cperm1; 6512 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6513 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6514 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6515 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6516 6517 /* Support for HYPRE matrices, kind of a hack. 6518 Swap min column with diagonal so that diagonal values will go first */ 6519 PetscBool hypre; 6520 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6521 if (hypre) { 6522 PetscInt *minj; 6523 PetscBT hasdiag; 6524 6525 PetscCall(PetscBTCreate(m, &hasdiag)); 6526 PetscCall(PetscMalloc1(m, &minj)); 6527 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6528 for (k = i1start; k < rem; k++) { 6529 if (j1[k] < cstart || j1[k] >= cend) continue; 6530 const PetscInt rindex = i1[k] - rstart; 6531 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6532 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6533 } 6534 for (k = 0; k < n2; k++) { 6535 if (j2[k] < cstart || j2[k] >= cend) continue; 6536 const PetscInt rindex = i2[k] - rstart; 6537 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6538 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6539 } 6540 for (k = i1start; k < rem; k++) { 6541 const PetscInt rindex = i1[k] - rstart; 6542 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6543 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6544 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6545 } 6546 for (k = 0; k < n2; k++) { 6547 const PetscInt rindex = i2[k] - rstart; 6548 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6549 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6550 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6551 } 6552 PetscCall(PetscBTDestroy(&hasdiag)); 6553 PetscCall(PetscFree(minj)); 6554 } 6555 6556 /* Split local COOs and received COOs into diag/offdiag portions */ 6557 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6558 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6559 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6560 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6561 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6562 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6563 6564 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6565 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6566 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6567 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6568 6569 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6570 PetscInt *Ai, *Bi; 6571 PetscInt *Aj, *Bj; 6572 6573 PetscCall(PetscMalloc1(m + 1, &Ai)); 6574 PetscCall(PetscMalloc1(m + 1, &Bi)); 6575 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6576 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6577 6578 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6579 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6580 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6581 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6582 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6583 6584 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6585 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6586 6587 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6588 /* expect nonzeros in A/B most likely have local contributing entries */ 6589 PetscInt Annz = Ai[m]; 6590 PetscInt Bnnz = Bi[m]; 6591 PetscCount *Ajmap1_new, *Bjmap1_new; 6592 6593 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6594 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6595 6596 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6597 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6598 6599 PetscCall(PetscFree(Aimap1)); 6600 PetscCall(PetscFree(Ajmap1)); 6601 PetscCall(PetscFree(Bimap1)); 6602 PetscCall(PetscFree(Bjmap1)); 6603 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6604 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6605 PetscCall(PetscFree(perm1)); 6606 PetscCall(PetscFree3(i2, j2, perm2)); 6607 6608 Ajmap1 = Ajmap1_new; 6609 Bjmap1 = Bjmap1_new; 6610 6611 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6612 if (Annz < Annz1 + Annz2) { 6613 PetscInt *Aj_new; 6614 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6615 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6616 PetscCall(PetscFree(Aj)); 6617 Aj = Aj_new; 6618 } 6619 6620 if (Bnnz < Bnnz1 + Bnnz2) { 6621 PetscInt *Bj_new; 6622 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6623 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6624 PetscCall(PetscFree(Bj)); 6625 Bj = Bj_new; 6626 } 6627 6628 /* Create new submatrices for on-process and off-process coupling */ 6629 PetscScalar *Aa, *Ba; 6630 MatType rtype; 6631 Mat_SeqAIJ *a, *b; 6632 PetscObjectState state; 6633 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6634 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6635 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6636 if (cstart) { 6637 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6638 } 6639 6640 PetscCall(MatGetRootType_Private(mat, &rtype)); 6641 6642 MatSeqXAIJGetOptions_Private(mpiaij->A); 6643 PetscCall(MatDestroy(&mpiaij->A)); 6644 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6645 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6646 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6647 6648 MatSeqXAIJGetOptions_Private(mpiaij->B); 6649 PetscCall(MatDestroy(&mpiaij->B)); 6650 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6651 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6652 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6653 6654 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6655 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6656 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6657 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6658 6659 a = (Mat_SeqAIJ *)mpiaij->A->data; 6660 b = (Mat_SeqAIJ *)mpiaij->B->data; 6661 a->free_a = PETSC_TRUE; 6662 a->free_ij = PETSC_TRUE; 6663 b->free_a = PETSC_TRUE; 6664 b->free_ij = PETSC_TRUE; 6665 a->maxnz = a->nz; 6666 b->maxnz = b->nz; 6667 6668 /* conversion must happen AFTER multiply setup */ 6669 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6670 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6671 PetscCall(VecDestroy(&mpiaij->lvec)); 6672 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6673 6674 // Put the COO struct in a container and then attach that to the matrix 6675 PetscCall(PetscMalloc1(1, &coo)); 6676 coo->n = coo_n; 6677 coo->sf = sf2; 6678 coo->sendlen = nleaves; 6679 coo->recvlen = nroots; 6680 coo->Annz = Annz; 6681 coo->Bnnz = Bnnz; 6682 coo->Annz2 = Annz2; 6683 coo->Bnnz2 = Bnnz2; 6684 coo->Atot1 = Atot1; 6685 coo->Atot2 = Atot2; 6686 coo->Btot1 = Btot1; 6687 coo->Btot2 = Btot2; 6688 coo->Ajmap1 = Ajmap1; 6689 coo->Aperm1 = Aperm1; 6690 coo->Bjmap1 = Bjmap1; 6691 coo->Bperm1 = Bperm1; 6692 coo->Aimap2 = Aimap2; 6693 coo->Ajmap2 = Ajmap2; 6694 coo->Aperm2 = Aperm2; 6695 coo->Bimap2 = Bimap2; 6696 coo->Bjmap2 = Bjmap2; 6697 coo->Bperm2 = Bperm2; 6698 coo->Cperm1 = Cperm1; 6699 // Allocate in preallocation. If not used, it has zero cost on host 6700 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6701 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6702 PetscCall(PetscContainerSetPointer(container, coo)); 6703 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6704 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6705 PetscCall(PetscContainerDestroy(&container)); 6706 PetscFunctionReturn(PETSC_SUCCESS); 6707 } 6708 6709 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6710 { 6711 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6712 Mat A = mpiaij->A, B = mpiaij->B; 6713 PetscScalar *Aa, *Ba; 6714 PetscScalar *sendbuf, *recvbuf; 6715 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6716 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6717 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6718 const PetscCount *Cperm1; 6719 PetscContainer container; 6720 MatCOOStruct_MPIAIJ *coo; 6721 6722 PetscFunctionBegin; 6723 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6724 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6725 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6726 sendbuf = coo->sendbuf; 6727 recvbuf = coo->recvbuf; 6728 Ajmap1 = coo->Ajmap1; 6729 Ajmap2 = coo->Ajmap2; 6730 Aimap2 = coo->Aimap2; 6731 Bjmap1 = coo->Bjmap1; 6732 Bjmap2 = coo->Bjmap2; 6733 Bimap2 = coo->Bimap2; 6734 Aperm1 = coo->Aperm1; 6735 Aperm2 = coo->Aperm2; 6736 Bperm1 = coo->Bperm1; 6737 Bperm2 = coo->Bperm2; 6738 Cperm1 = coo->Cperm1; 6739 6740 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6741 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6742 6743 /* Pack entries to be sent to remote */ 6744 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6745 6746 /* Send remote entries to their owner and overlap the communication with local computation */ 6747 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6748 /* Add local entries to A and B */ 6749 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6750 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6751 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6752 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6753 } 6754 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6755 PetscScalar sum = 0.0; 6756 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6757 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6758 } 6759 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6760 6761 /* Add received remote entries to A and B */ 6762 for (PetscCount i = 0; i < coo->Annz2; i++) { 6763 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6764 } 6765 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6766 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6767 } 6768 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6769 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6770 PetscFunctionReturn(PETSC_SUCCESS); 6771 } 6772 6773 /*MC 6774 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6775 6776 Options Database Keys: 6777 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6778 6779 Level: beginner 6780 6781 Notes: 6782 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6783 in this case the values associated with the rows and columns one passes in are set to zero 6784 in the matrix 6785 6786 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6787 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6788 6789 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6790 M*/ 6791 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6792 { 6793 Mat_MPIAIJ *b; 6794 PetscMPIInt size; 6795 6796 PetscFunctionBegin; 6797 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6798 6799 PetscCall(PetscNew(&b)); 6800 B->data = (void *)b; 6801 B->ops[0] = MatOps_Values; 6802 B->assembled = PETSC_FALSE; 6803 B->insertmode = NOT_SET_VALUES; 6804 b->size = size; 6805 6806 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6807 6808 /* build cache for off array entries formed */ 6809 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6810 6811 b->donotstash = PETSC_FALSE; 6812 b->colmap = NULL; 6813 b->garray = NULL; 6814 b->roworiented = PETSC_TRUE; 6815 6816 /* stuff used for matrix vector multiply */ 6817 b->lvec = NULL; 6818 b->Mvctx = NULL; 6819 6820 /* stuff for MatGetRow() */ 6821 b->rowindices = NULL; 6822 b->rowvalues = NULL; 6823 b->getrowactive = PETSC_FALSE; 6824 6825 /* flexible pointer used in CUSPARSE classes */ 6826 b->spptr = NULL; 6827 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6837 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6839 #if defined(PETSC_HAVE_CUDA) 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6841 #endif 6842 #if defined(PETSC_HAVE_HIP) 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6844 #endif 6845 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6847 #endif 6848 #if defined(PETSC_HAVE_MKL_SPARSE) 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6850 #endif 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6855 #if defined(PETSC_HAVE_ELEMENTAL) 6856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6857 #endif 6858 #if defined(PETSC_HAVE_SCALAPACK) 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6860 #endif 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6863 #if defined(PETSC_HAVE_HYPRE) 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6866 #endif 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6869 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6870 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6871 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6872 PetscFunctionReturn(PETSC_SUCCESS); 6873 } 6874 6875 /*@ 6876 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6877 and "off-diagonal" part of the matrix in CSR format. 6878 6879 Collective 6880 6881 Input Parameters: 6882 + comm - MPI communicator 6883 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6884 . n - This value should be the same as the local size used in creating the 6885 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6886 calculated if `N` is given) For square matrices `n` is almost always `m`. 6887 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6888 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6889 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6890 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6891 . a - matrix values 6892 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6893 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6894 - oa - matrix values 6895 6896 Output Parameter: 6897 . mat - the matrix 6898 6899 Level: advanced 6900 6901 Notes: 6902 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6903 must free the arrays once the matrix has been destroyed and not before. 6904 6905 The `i` and `j` indices are 0 based 6906 6907 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6908 6909 This sets local rows and cannot be used to set off-processor values. 6910 6911 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6912 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6913 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6914 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6915 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6916 communication if it is known that only local entries will be set. 6917 6918 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6919 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6920 @*/ 6921 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6922 { 6923 Mat_MPIAIJ *maij; 6924 6925 PetscFunctionBegin; 6926 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6927 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6928 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6929 PetscCall(MatCreate(comm, mat)); 6930 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6931 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6932 maij = (Mat_MPIAIJ *)(*mat)->data; 6933 6934 (*mat)->preallocated = PETSC_TRUE; 6935 6936 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6937 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6938 6939 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6940 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6941 6942 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6943 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6944 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6945 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6946 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6947 PetscFunctionReturn(PETSC_SUCCESS); 6948 } 6949 6950 typedef struct { 6951 Mat *mp; /* intermediate products */ 6952 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6953 PetscInt cp; /* number of intermediate products */ 6954 6955 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6956 PetscInt *startsj_s, *startsj_r; 6957 PetscScalar *bufa; 6958 Mat P_oth; 6959 6960 /* may take advantage of merging product->B */ 6961 Mat Bloc; /* B-local by merging diag and off-diag */ 6962 6963 /* cusparse does not have support to split between symbolic and numeric phases. 6964 When api_user is true, we don't need to update the numerical values 6965 of the temporary storage */ 6966 PetscBool reusesym; 6967 6968 /* support for COO values insertion */ 6969 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6970 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6971 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6972 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6973 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6974 PetscMemType mtype; 6975 6976 /* customization */ 6977 PetscBool abmerge; 6978 PetscBool P_oth_bind; 6979 } MatMatMPIAIJBACKEND; 6980 6981 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6982 { 6983 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6984 PetscInt i; 6985 6986 PetscFunctionBegin; 6987 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6988 PetscCall(PetscFree(mmdata->bufa)); 6989 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6990 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6991 PetscCall(MatDestroy(&mmdata->P_oth)); 6992 PetscCall(MatDestroy(&mmdata->Bloc)); 6993 PetscCall(PetscSFDestroy(&mmdata->sf)); 6994 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6995 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6996 PetscCall(PetscFree(mmdata->own[0])); 6997 PetscCall(PetscFree(mmdata->own)); 6998 PetscCall(PetscFree(mmdata->off[0])); 6999 PetscCall(PetscFree(mmdata->off)); 7000 PetscCall(PetscFree(mmdata)); 7001 PetscFunctionReturn(PETSC_SUCCESS); 7002 } 7003 7004 /* Copy selected n entries with indices in idx[] of A to v[]. 7005 If idx is NULL, copy the whole data array of A to v[] 7006 */ 7007 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7008 { 7009 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7010 7011 PetscFunctionBegin; 7012 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7013 if (f) { 7014 PetscCall((*f)(A, n, idx, v)); 7015 } else { 7016 const PetscScalar *vv; 7017 7018 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7019 if (n && idx) { 7020 PetscScalar *w = v; 7021 const PetscInt *oi = idx; 7022 PetscInt j; 7023 7024 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7025 } else { 7026 PetscCall(PetscArraycpy(v, vv, n)); 7027 } 7028 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7029 } 7030 PetscFunctionReturn(PETSC_SUCCESS); 7031 } 7032 7033 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7034 { 7035 MatMatMPIAIJBACKEND *mmdata; 7036 PetscInt i, n_d, n_o; 7037 7038 PetscFunctionBegin; 7039 MatCheckProduct(C, 1); 7040 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7041 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7042 if (!mmdata->reusesym) { /* update temporary matrices */ 7043 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7044 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7045 } 7046 mmdata->reusesym = PETSC_FALSE; 7047 7048 for (i = 0; i < mmdata->cp; i++) { 7049 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7050 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7051 } 7052 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7053 PetscInt noff; 7054 7055 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7056 if (mmdata->mptmp[i]) continue; 7057 if (noff) { 7058 PetscInt nown; 7059 7060 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7061 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7062 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7063 n_o += noff; 7064 n_d += nown; 7065 } else { 7066 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7067 7068 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7069 n_d += mm->nz; 7070 } 7071 } 7072 if (mmdata->hasoffproc) { /* offprocess insertion */ 7073 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7074 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7075 } 7076 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7077 PetscFunctionReturn(PETSC_SUCCESS); 7078 } 7079 7080 /* Support for Pt * A, A * P, or Pt * A * P */ 7081 #define MAX_NUMBER_INTERMEDIATE 4 7082 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7083 { 7084 Mat_Product *product = C->product; 7085 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7086 Mat_MPIAIJ *a, *p; 7087 MatMatMPIAIJBACKEND *mmdata; 7088 ISLocalToGlobalMapping P_oth_l2g = NULL; 7089 IS glob = NULL; 7090 const char *prefix; 7091 char pprefix[256]; 7092 const PetscInt *globidx, *P_oth_idx; 7093 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7094 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7095 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7096 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7097 /* a base offset; type-2: sparse with a local to global map table */ 7098 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7099 7100 MatProductType ptype; 7101 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7102 PetscMPIInt size; 7103 7104 PetscFunctionBegin; 7105 MatCheckProduct(C, 1); 7106 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7107 ptype = product->type; 7108 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7109 ptype = MATPRODUCT_AB; 7110 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7111 } 7112 switch (ptype) { 7113 case MATPRODUCT_AB: 7114 A = product->A; 7115 P = product->B; 7116 m = A->rmap->n; 7117 n = P->cmap->n; 7118 M = A->rmap->N; 7119 N = P->cmap->N; 7120 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7121 break; 7122 case MATPRODUCT_AtB: 7123 P = product->A; 7124 A = product->B; 7125 m = P->cmap->n; 7126 n = A->cmap->n; 7127 M = P->cmap->N; 7128 N = A->cmap->N; 7129 hasoffproc = PETSC_TRUE; 7130 break; 7131 case MATPRODUCT_PtAP: 7132 A = product->A; 7133 P = product->B; 7134 m = P->cmap->n; 7135 n = P->cmap->n; 7136 M = P->cmap->N; 7137 N = P->cmap->N; 7138 hasoffproc = PETSC_TRUE; 7139 break; 7140 default: 7141 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7142 } 7143 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7144 if (size == 1) hasoffproc = PETSC_FALSE; 7145 7146 /* defaults */ 7147 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7148 mp[i] = NULL; 7149 mptmp[i] = PETSC_FALSE; 7150 rmapt[i] = -1; 7151 cmapt[i] = -1; 7152 rmapa[i] = NULL; 7153 cmapa[i] = NULL; 7154 } 7155 7156 /* customization */ 7157 PetscCall(PetscNew(&mmdata)); 7158 mmdata->reusesym = product->api_user; 7159 if (ptype == MATPRODUCT_AB) { 7160 if (product->api_user) { 7161 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7162 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7163 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7164 PetscOptionsEnd(); 7165 } else { 7166 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7167 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7168 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7169 PetscOptionsEnd(); 7170 } 7171 } else if (ptype == MATPRODUCT_PtAP) { 7172 if (product->api_user) { 7173 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7174 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7175 PetscOptionsEnd(); 7176 } else { 7177 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7178 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7179 PetscOptionsEnd(); 7180 } 7181 } 7182 a = (Mat_MPIAIJ *)A->data; 7183 p = (Mat_MPIAIJ *)P->data; 7184 PetscCall(MatSetSizes(C, m, n, M, N)); 7185 PetscCall(PetscLayoutSetUp(C->rmap)); 7186 PetscCall(PetscLayoutSetUp(C->cmap)); 7187 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7188 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7189 7190 cp = 0; 7191 switch (ptype) { 7192 case MATPRODUCT_AB: /* A * P */ 7193 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7194 7195 /* A_diag * P_local (merged or not) */ 7196 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7197 /* P is product->B */ 7198 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7199 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7200 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7201 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7202 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7203 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7204 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7205 mp[cp]->product->api_user = product->api_user; 7206 PetscCall(MatProductSetFromOptions(mp[cp])); 7207 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7208 PetscCall(ISGetIndices(glob, &globidx)); 7209 rmapt[cp] = 1; 7210 cmapt[cp] = 2; 7211 cmapa[cp] = globidx; 7212 mptmp[cp] = PETSC_FALSE; 7213 cp++; 7214 } else { /* A_diag * P_diag and A_diag * P_off */ 7215 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7216 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7217 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7218 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7219 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7220 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7221 mp[cp]->product->api_user = product->api_user; 7222 PetscCall(MatProductSetFromOptions(mp[cp])); 7223 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7224 rmapt[cp] = 1; 7225 cmapt[cp] = 1; 7226 mptmp[cp] = PETSC_FALSE; 7227 cp++; 7228 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7229 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7230 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7231 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7232 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7233 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7234 mp[cp]->product->api_user = product->api_user; 7235 PetscCall(MatProductSetFromOptions(mp[cp])); 7236 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7237 rmapt[cp] = 1; 7238 cmapt[cp] = 2; 7239 cmapa[cp] = p->garray; 7240 mptmp[cp] = PETSC_FALSE; 7241 cp++; 7242 } 7243 7244 /* A_off * P_other */ 7245 if (mmdata->P_oth) { 7246 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7247 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7248 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7249 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7250 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7251 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7252 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7253 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7254 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7255 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7256 mp[cp]->product->api_user = product->api_user; 7257 PetscCall(MatProductSetFromOptions(mp[cp])); 7258 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7259 rmapt[cp] = 1; 7260 cmapt[cp] = 2; 7261 cmapa[cp] = P_oth_idx; 7262 mptmp[cp] = PETSC_FALSE; 7263 cp++; 7264 } 7265 break; 7266 7267 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7268 /* A is product->B */ 7269 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7270 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7271 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7272 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7273 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7274 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7275 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7276 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7277 mp[cp]->product->api_user = product->api_user; 7278 PetscCall(MatProductSetFromOptions(mp[cp])); 7279 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7280 PetscCall(ISGetIndices(glob, &globidx)); 7281 rmapt[cp] = 2; 7282 rmapa[cp] = globidx; 7283 cmapt[cp] = 2; 7284 cmapa[cp] = globidx; 7285 mptmp[cp] = PETSC_FALSE; 7286 cp++; 7287 } else { 7288 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7289 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7290 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7291 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7292 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7293 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7294 mp[cp]->product->api_user = product->api_user; 7295 PetscCall(MatProductSetFromOptions(mp[cp])); 7296 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7297 PetscCall(ISGetIndices(glob, &globidx)); 7298 rmapt[cp] = 1; 7299 cmapt[cp] = 2; 7300 cmapa[cp] = globidx; 7301 mptmp[cp] = PETSC_FALSE; 7302 cp++; 7303 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7304 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7305 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7306 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7307 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7308 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7309 mp[cp]->product->api_user = product->api_user; 7310 PetscCall(MatProductSetFromOptions(mp[cp])); 7311 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7312 rmapt[cp] = 2; 7313 rmapa[cp] = p->garray; 7314 cmapt[cp] = 2; 7315 cmapa[cp] = globidx; 7316 mptmp[cp] = PETSC_FALSE; 7317 cp++; 7318 } 7319 break; 7320 case MATPRODUCT_PtAP: 7321 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7322 /* P is product->B */ 7323 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7324 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7325 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7326 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7327 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7328 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7329 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7330 mp[cp]->product->api_user = product->api_user; 7331 PetscCall(MatProductSetFromOptions(mp[cp])); 7332 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7333 PetscCall(ISGetIndices(glob, &globidx)); 7334 rmapt[cp] = 2; 7335 rmapa[cp] = globidx; 7336 cmapt[cp] = 2; 7337 cmapa[cp] = globidx; 7338 mptmp[cp] = PETSC_FALSE; 7339 cp++; 7340 if (mmdata->P_oth) { 7341 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7342 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7343 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7344 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7345 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7346 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7347 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7348 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7349 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7350 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7351 mp[cp]->product->api_user = product->api_user; 7352 PetscCall(MatProductSetFromOptions(mp[cp])); 7353 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7354 mptmp[cp] = PETSC_TRUE; 7355 cp++; 7356 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7357 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7358 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7359 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7360 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7361 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7362 mp[cp]->product->api_user = product->api_user; 7363 PetscCall(MatProductSetFromOptions(mp[cp])); 7364 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7365 rmapt[cp] = 2; 7366 rmapa[cp] = globidx; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = P_oth_idx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 } 7372 break; 7373 default: 7374 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7375 } 7376 /* sanity check */ 7377 if (size > 1) 7378 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7379 7380 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7381 for (i = 0; i < cp; i++) { 7382 mmdata->mp[i] = mp[i]; 7383 mmdata->mptmp[i] = mptmp[i]; 7384 } 7385 mmdata->cp = cp; 7386 C->product->data = mmdata; 7387 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7388 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7389 7390 /* memory type */ 7391 mmdata->mtype = PETSC_MEMTYPE_HOST; 7392 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7393 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7394 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7395 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7396 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7397 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7398 7399 /* prepare coo coordinates for values insertion */ 7400 7401 /* count total nonzeros of those intermediate seqaij Mats 7402 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7403 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7404 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7405 */ 7406 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7407 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7408 if (mptmp[cp]) continue; 7409 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7410 const PetscInt *rmap = rmapa[cp]; 7411 const PetscInt mr = mp[cp]->rmap->n; 7412 const PetscInt rs = C->rmap->rstart; 7413 const PetscInt re = C->rmap->rend; 7414 const PetscInt *ii = mm->i; 7415 for (i = 0; i < mr; i++) { 7416 const PetscInt gr = rmap[i]; 7417 const PetscInt nz = ii[i + 1] - ii[i]; 7418 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7419 else ncoo_oown += nz; /* this row is local */ 7420 } 7421 } else ncoo_d += mm->nz; 7422 } 7423 7424 /* 7425 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7426 7427 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7428 7429 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7430 7431 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7432 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7433 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7434 7435 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7436 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7437 */ 7438 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7439 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7440 7441 /* gather (i,j) of nonzeros inserted by remote procs */ 7442 if (hasoffproc) { 7443 PetscSF msf; 7444 PetscInt ncoo2, *coo_i2, *coo_j2; 7445 7446 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7447 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7448 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7449 7450 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7451 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7452 PetscInt *idxoff = mmdata->off[cp]; 7453 PetscInt *idxown = mmdata->own[cp]; 7454 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7455 const PetscInt *rmap = rmapa[cp]; 7456 const PetscInt *cmap = cmapa[cp]; 7457 const PetscInt *ii = mm->i; 7458 PetscInt *coi = coo_i + ncoo_o; 7459 PetscInt *coj = coo_j + ncoo_o; 7460 const PetscInt mr = mp[cp]->rmap->n; 7461 const PetscInt rs = C->rmap->rstart; 7462 const PetscInt re = C->rmap->rend; 7463 const PetscInt cs = C->cmap->rstart; 7464 for (i = 0; i < mr; i++) { 7465 const PetscInt *jj = mm->j + ii[i]; 7466 const PetscInt gr = rmap[i]; 7467 const PetscInt nz = ii[i + 1] - ii[i]; 7468 if (gr < rs || gr >= re) { /* this is an offproc row */ 7469 for (j = ii[i]; j < ii[i + 1]; j++) { 7470 *coi++ = gr; 7471 *idxoff++ = j; 7472 } 7473 if (!cmapt[cp]) { /* already global */ 7474 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7475 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7476 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7477 } else { /* offdiag */ 7478 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7479 } 7480 ncoo_o += nz; 7481 } else { /* this is a local row */ 7482 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7483 } 7484 } 7485 } 7486 mmdata->off[cp + 1] = idxoff; 7487 mmdata->own[cp + 1] = idxown; 7488 } 7489 7490 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7491 PetscInt incoo_o; 7492 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7493 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7494 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7495 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7496 ncoo = ncoo_d + ncoo_oown + ncoo2; 7497 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7498 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7499 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7500 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7501 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7502 PetscCall(PetscFree2(coo_i, coo_j)); 7503 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7504 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7505 coo_i = coo_i2; 7506 coo_j = coo_j2; 7507 } else { /* no offproc values insertion */ 7508 ncoo = ncoo_d; 7509 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7510 7511 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7512 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7513 PetscCall(PetscSFSetUp(mmdata->sf)); 7514 } 7515 mmdata->hasoffproc = hasoffproc; 7516 7517 /* gather (i,j) of nonzeros inserted locally */ 7518 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7519 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7520 PetscInt *coi = coo_i + ncoo_d; 7521 PetscInt *coj = coo_j + ncoo_d; 7522 const PetscInt *jj = mm->j; 7523 const PetscInt *ii = mm->i; 7524 const PetscInt *cmap = cmapa[cp]; 7525 const PetscInt *rmap = rmapa[cp]; 7526 const PetscInt mr = mp[cp]->rmap->n; 7527 const PetscInt rs = C->rmap->rstart; 7528 const PetscInt re = C->rmap->rend; 7529 const PetscInt cs = C->cmap->rstart; 7530 7531 if (mptmp[cp]) continue; 7532 if (rmapt[cp] == 1) { /* consecutive rows */ 7533 /* fill coo_i */ 7534 for (i = 0; i < mr; i++) { 7535 const PetscInt gr = i + rs; 7536 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7537 } 7538 /* fill coo_j */ 7539 if (!cmapt[cp]) { /* type-0, already global */ 7540 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7541 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7542 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7543 } else { /* type-2, local to global for sparse columns */ 7544 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7545 } 7546 ncoo_d += mm->nz; 7547 } else if (rmapt[cp] == 2) { /* sparse rows */ 7548 for (i = 0; i < mr; i++) { 7549 const PetscInt *jj = mm->j + ii[i]; 7550 const PetscInt gr = rmap[i]; 7551 const PetscInt nz = ii[i + 1] - ii[i]; 7552 if (gr >= rs && gr < re) { /* local rows */ 7553 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7554 if (!cmapt[cp]) { /* type-0, already global */ 7555 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7556 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7557 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7558 } else { /* type-2, local to global for sparse columns */ 7559 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7560 } 7561 ncoo_d += nz; 7562 } 7563 } 7564 } 7565 } 7566 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7567 PetscCall(ISDestroy(&glob)); 7568 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7569 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7570 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7571 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7572 7573 /* set block sizes */ 7574 A = product->A; 7575 P = product->B; 7576 switch (ptype) { 7577 case MATPRODUCT_PtAP: 7578 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7579 break; 7580 case MATPRODUCT_RARt: 7581 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7582 break; 7583 case MATPRODUCT_ABC: 7584 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7585 break; 7586 case MATPRODUCT_AB: 7587 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7588 break; 7589 case MATPRODUCT_AtB: 7590 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7591 break; 7592 case MATPRODUCT_ABt: 7593 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7594 break; 7595 default: 7596 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7597 } 7598 7599 /* preallocate with COO data */ 7600 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7601 PetscCall(PetscFree2(coo_i, coo_j)); 7602 PetscFunctionReturn(PETSC_SUCCESS); 7603 } 7604 7605 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7606 { 7607 Mat_Product *product = mat->product; 7608 #if defined(PETSC_HAVE_DEVICE) 7609 PetscBool match = PETSC_FALSE; 7610 PetscBool usecpu = PETSC_FALSE; 7611 #else 7612 PetscBool match = PETSC_TRUE; 7613 #endif 7614 7615 PetscFunctionBegin; 7616 MatCheckProduct(mat, 1); 7617 #if defined(PETSC_HAVE_DEVICE) 7618 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7619 if (match) { /* we can always fallback to the CPU if requested */ 7620 switch (product->type) { 7621 case MATPRODUCT_AB: 7622 if (product->api_user) { 7623 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7624 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7625 PetscOptionsEnd(); 7626 } else { 7627 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7628 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7629 PetscOptionsEnd(); 7630 } 7631 break; 7632 case MATPRODUCT_AtB: 7633 if (product->api_user) { 7634 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7635 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7636 PetscOptionsEnd(); 7637 } else { 7638 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7639 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7640 PetscOptionsEnd(); 7641 } 7642 break; 7643 case MATPRODUCT_PtAP: 7644 if (product->api_user) { 7645 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7646 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7647 PetscOptionsEnd(); 7648 } else { 7649 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7650 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7651 PetscOptionsEnd(); 7652 } 7653 break; 7654 default: 7655 break; 7656 } 7657 match = (PetscBool)!usecpu; 7658 } 7659 #endif 7660 if (match) { 7661 switch (product->type) { 7662 case MATPRODUCT_AB: 7663 case MATPRODUCT_AtB: 7664 case MATPRODUCT_PtAP: 7665 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7666 break; 7667 default: 7668 break; 7669 } 7670 } 7671 /* fallback to MPIAIJ ops */ 7672 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7673 PetscFunctionReturn(PETSC_SUCCESS); 7674 } 7675 7676 /* 7677 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7678 7679 n - the number of block indices in cc[] 7680 cc - the block indices (must be large enough to contain the indices) 7681 */ 7682 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7683 { 7684 PetscInt cnt = -1, nidx, j; 7685 const PetscInt *idx; 7686 7687 PetscFunctionBegin; 7688 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7689 if (nidx) { 7690 cnt = 0; 7691 cc[cnt] = idx[0] / bs; 7692 for (j = 1; j < nidx; j++) { 7693 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7694 } 7695 } 7696 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7697 *n = cnt + 1; 7698 PetscFunctionReturn(PETSC_SUCCESS); 7699 } 7700 7701 /* 7702 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7703 7704 ncollapsed - the number of block indices 7705 collapsed - the block indices (must be large enough to contain the indices) 7706 */ 7707 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7708 { 7709 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7710 7711 PetscFunctionBegin; 7712 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7713 for (i = start + 1; i < start + bs; i++) { 7714 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7715 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7716 cprevtmp = cprev; 7717 cprev = merged; 7718 merged = cprevtmp; 7719 } 7720 *ncollapsed = nprev; 7721 if (collapsed) *collapsed = cprev; 7722 PetscFunctionReturn(PETSC_SUCCESS); 7723 } 7724 7725 /* 7726 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7727 7728 Input Parameter: 7729 . Amat - matrix 7730 - symmetrize - make the result symmetric 7731 + scale - scale with diagonal 7732 7733 Output Parameter: 7734 . a_Gmat - output scalar graph >= 0 7735 7736 */ 7737 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7738 { 7739 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7740 MPI_Comm comm; 7741 Mat Gmat; 7742 PetscBool ismpiaij, isseqaij; 7743 Mat a, b, c; 7744 MatType jtype; 7745 7746 PetscFunctionBegin; 7747 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7748 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7749 PetscCall(MatGetSize(Amat, &MM, &NN)); 7750 PetscCall(MatGetBlockSize(Amat, &bs)); 7751 nloc = (Iend - Istart) / bs; 7752 7753 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7754 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7755 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7756 7757 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7758 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7759 implementation */ 7760 if (bs > 1) { 7761 PetscCall(MatGetType(Amat, &jtype)); 7762 PetscCall(MatCreate(comm, &Gmat)); 7763 PetscCall(MatSetType(Gmat, jtype)); 7764 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7765 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7766 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7767 PetscInt *d_nnz, *o_nnz; 7768 MatScalar *aa, val, *AA; 7769 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7770 7771 if (isseqaij) { 7772 a = Amat; 7773 b = NULL; 7774 } else { 7775 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7776 a = d->A; 7777 b = d->B; 7778 } 7779 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7780 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7781 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7782 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7783 const PetscInt *cols1, *cols2; 7784 7785 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7786 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7787 nnz[brow / bs] = nc2 / bs; 7788 if (nc2 % bs) ok = 0; 7789 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7790 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7791 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7792 if (nc1 != nc2) ok = 0; 7793 else { 7794 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7795 if (cols1[jj] != cols2[jj]) ok = 0; 7796 if (cols1[jj] % bs != jj % bs) ok = 0; 7797 } 7798 } 7799 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7800 } 7801 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7802 if (!ok) { 7803 PetscCall(PetscFree2(d_nnz, o_nnz)); 7804 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7805 goto old_bs; 7806 } 7807 } 7808 } 7809 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7810 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7811 PetscCall(PetscFree2(d_nnz, o_nnz)); 7812 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7813 // diag 7814 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7815 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7816 7817 ai = aseq->i; 7818 n = ai[brow + 1] - ai[brow]; 7819 aj = aseq->j + ai[brow]; 7820 for (PetscInt k = 0; k < n; k += bs) { // block columns 7821 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7822 val = 0; 7823 if (index_size == 0) { 7824 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7825 aa = aseq->a + ai[brow + ii] + k; 7826 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7827 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7828 } 7829 } 7830 } else { // use (index,index) value if provided 7831 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7832 PetscInt ii = index[iii]; 7833 aa = aseq->a + ai[brow + ii] + k; 7834 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7835 PetscInt jj = index[jjj]; 7836 val += PetscAbs(PetscRealPart(aa[jj])); 7837 } 7838 } 7839 } 7840 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7841 AA[k / bs] = val; 7842 } 7843 grow = Istart / bs + brow / bs; 7844 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7845 } 7846 // off-diag 7847 if (ismpiaij) { 7848 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7849 const PetscScalar *vals; 7850 const PetscInt *cols, *garray = aij->garray; 7851 7852 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7853 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7854 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7855 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7856 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7857 AA[k / bs] = 0; 7858 AJ[cidx] = garray[cols[k]] / bs; 7859 } 7860 nc = ncols / bs; 7861 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7862 if (index_size == 0) { 7863 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7864 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7865 for (PetscInt k = 0; k < ncols; k += bs) { 7866 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7867 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7868 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7869 } 7870 } 7871 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7872 } 7873 } else { // use (index,index) value if provided 7874 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7875 PetscInt ii = index[iii]; 7876 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7877 for (PetscInt k = 0; k < ncols; k += bs) { 7878 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7879 PetscInt jj = index[jjj]; 7880 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7881 } 7882 } 7883 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7884 } 7885 } 7886 grow = Istart / bs + brow / bs; 7887 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7888 } 7889 } 7890 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7891 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7892 PetscCall(PetscFree2(AA, AJ)); 7893 } else { 7894 const PetscScalar *vals; 7895 const PetscInt *idx; 7896 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7897 old_bs: 7898 /* 7899 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7900 */ 7901 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7902 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7903 if (isseqaij) { 7904 PetscInt max_d_nnz; 7905 7906 /* 7907 Determine exact preallocation count for (sequential) scalar matrix 7908 */ 7909 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7910 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7911 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7912 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7913 PetscCall(PetscFree3(w0, w1, w2)); 7914 } else if (ismpiaij) { 7915 Mat Daij, Oaij; 7916 const PetscInt *garray; 7917 PetscInt max_d_nnz; 7918 7919 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7920 /* 7921 Determine exact preallocation count for diagonal block portion of scalar matrix 7922 */ 7923 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7924 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7925 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7926 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7927 PetscCall(PetscFree3(w0, w1, w2)); 7928 /* 7929 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7930 */ 7931 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7932 o_nnz[jj] = 0; 7933 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7934 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7935 o_nnz[jj] += ncols; 7936 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7937 } 7938 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7939 } 7940 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7941 /* get scalar copy (norms) of matrix */ 7942 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7943 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7944 PetscCall(PetscFree2(d_nnz, o_nnz)); 7945 for (Ii = Istart; Ii < Iend; Ii++) { 7946 PetscInt dest_row = Ii / bs; 7947 7948 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7949 for (jj = 0; jj < ncols; jj++) { 7950 PetscInt dest_col = idx[jj] / bs; 7951 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7952 7953 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7954 } 7955 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7956 } 7957 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7958 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7959 } 7960 } else { 7961 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7962 else { 7963 Gmat = Amat; 7964 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7965 } 7966 if (isseqaij) { 7967 a = Gmat; 7968 b = NULL; 7969 } else { 7970 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7971 a = d->A; 7972 b = d->B; 7973 } 7974 if (filter >= 0 || scale) { 7975 /* take absolute value of each entry */ 7976 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7977 MatInfo info; 7978 PetscScalar *avals; 7979 7980 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7981 PetscCall(MatSeqAIJGetArray(c, &avals)); 7982 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7983 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7984 } 7985 } 7986 } 7987 if (symmetrize) { 7988 PetscBool isset, issym; 7989 7990 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7991 if (!isset || !issym) { 7992 Mat matTrans; 7993 7994 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7995 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7996 PetscCall(MatDestroy(&matTrans)); 7997 } 7998 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7999 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8000 if (scale) { 8001 /* scale c for all diagonal values = 1 or -1 */ 8002 Vec diag; 8003 8004 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8005 PetscCall(MatGetDiagonal(Gmat, diag)); 8006 PetscCall(VecReciprocal(diag)); 8007 PetscCall(VecSqrtAbs(diag)); 8008 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8009 PetscCall(VecDestroy(&diag)); 8010 } 8011 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8012 if (filter >= 0) { 8013 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8014 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8015 } 8016 *a_Gmat = Gmat; 8017 PetscFunctionReturn(PETSC_SUCCESS); 8018 } 8019 8020 /* 8021 Special version for direct calls from Fortran 8022 */ 8023 8024 /* Change these macros so can be used in void function */ 8025 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8026 #undef PetscCall 8027 #define PetscCall(...) \ 8028 do { \ 8029 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8030 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8031 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8032 return; \ 8033 } \ 8034 } while (0) 8035 8036 #undef SETERRQ 8037 #define SETERRQ(comm, ierr, ...) \ 8038 do { \ 8039 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8040 return; \ 8041 } while (0) 8042 8043 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8044 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8045 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8046 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8047 #else 8048 #endif 8049 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8050 { 8051 Mat mat = *mmat; 8052 PetscInt m = *mm, n = *mn; 8053 InsertMode addv = *maddv; 8054 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8055 PetscScalar value; 8056 8057 MatCheckPreallocated(mat, 1); 8058 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8059 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8060 { 8061 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8062 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8063 PetscBool roworiented = aij->roworiented; 8064 8065 /* Some Variables required in the macro */ 8066 Mat A = aij->A; 8067 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8068 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8069 MatScalar *aa; 8070 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8071 Mat B = aij->B; 8072 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8073 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8074 MatScalar *ba; 8075 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8076 * cannot use "#if defined" inside a macro. */ 8077 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8078 8079 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8080 PetscInt nonew = a->nonew; 8081 MatScalar *ap1, *ap2; 8082 8083 PetscFunctionBegin; 8084 PetscCall(MatSeqAIJGetArray(A, &aa)); 8085 PetscCall(MatSeqAIJGetArray(B, &ba)); 8086 for (i = 0; i < m; i++) { 8087 if (im[i] < 0) continue; 8088 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8089 if (im[i] >= rstart && im[i] < rend) { 8090 row = im[i] - rstart; 8091 lastcol1 = -1; 8092 rp1 = aj + ai[row]; 8093 ap1 = aa + ai[row]; 8094 rmax1 = aimax[row]; 8095 nrow1 = ailen[row]; 8096 low1 = 0; 8097 high1 = nrow1; 8098 lastcol2 = -1; 8099 rp2 = bj + bi[row]; 8100 ap2 = ba + bi[row]; 8101 rmax2 = bimax[row]; 8102 nrow2 = bilen[row]; 8103 low2 = 0; 8104 high2 = nrow2; 8105 8106 for (j = 0; j < n; j++) { 8107 if (roworiented) value = v[i * n + j]; 8108 else value = v[i + j * m]; 8109 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8110 if (in[j] >= cstart && in[j] < cend) { 8111 col = in[j] - cstart; 8112 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8113 } else if (in[j] < 0) continue; 8114 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8115 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8116 } else { 8117 if (mat->was_assembled) { 8118 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8119 #if defined(PETSC_USE_CTABLE) 8120 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8121 col--; 8122 #else 8123 col = aij->colmap[in[j]] - 1; 8124 #endif 8125 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8126 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8127 col = in[j]; 8128 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8129 B = aij->B; 8130 b = (Mat_SeqAIJ *)B->data; 8131 bimax = b->imax; 8132 bi = b->i; 8133 bilen = b->ilen; 8134 bj = b->j; 8135 rp2 = bj + bi[row]; 8136 ap2 = ba + bi[row]; 8137 rmax2 = bimax[row]; 8138 nrow2 = bilen[row]; 8139 low2 = 0; 8140 high2 = nrow2; 8141 bm = aij->B->rmap->n; 8142 ba = b->a; 8143 inserted = PETSC_FALSE; 8144 } 8145 } else col = in[j]; 8146 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8147 } 8148 } 8149 } else if (!aij->donotstash) { 8150 if (roworiented) { 8151 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8152 } else { 8153 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8154 } 8155 } 8156 } 8157 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8158 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8159 } 8160 PetscFunctionReturnVoid(); 8161 } 8162 8163 /* Undefining these here since they were redefined from their original definition above! No 8164 * other PETSc functions should be defined past this point, as it is impossible to recover the 8165 * original definitions */ 8166 #undef PetscCall 8167 #undef SETERRQ 8168