1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_FORCE_DIAGONAL_ENTRIES: 1691 case MAT_SORTED_FULL: 1692 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1693 break; 1694 case MAT_IGNORE_OFF_PROC_ENTRIES: 1695 a->donotstash = flg; 1696 break; 1697 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1698 case MAT_SPD: 1699 case MAT_SYMMETRIC: 1700 case MAT_STRUCTURALLY_SYMMETRIC: 1701 case MAT_HERMITIAN: 1702 case MAT_SYMMETRY_ETERNAL: 1703 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1704 case MAT_SPD_ETERNAL: 1705 /* if the diagonal matrix is square it inherits some of the properties above */ 1706 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1707 break; 1708 case MAT_SUBMAT_SINGLEIS: 1709 A->submat_singleis = flg; 1710 break; 1711 case MAT_STRUCTURE_ONLY: 1712 /* The option is handled directly by MatSetOption() */ 1713 break; 1714 default: 1715 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1716 } 1717 PetscFunctionReturn(PETSC_SUCCESS); 1718 } 1719 1720 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1721 { 1722 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1723 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1724 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1725 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1726 PetscInt *cmap, *idx_p; 1727 1728 PetscFunctionBegin; 1729 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1730 mat->getrowactive = PETSC_TRUE; 1731 1732 if (!mat->rowvalues && (idx || v)) { 1733 /* 1734 allocate enough space to hold information from the longest row. 1735 */ 1736 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1737 PetscInt max = 1, tmp; 1738 for (i = 0; i < matin->rmap->n; i++) { 1739 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1740 if (max < tmp) max = tmp; 1741 } 1742 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1743 } 1744 1745 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1746 lrow = row - rstart; 1747 1748 pvA = &vworkA; 1749 pcA = &cworkA; 1750 pvB = &vworkB; 1751 pcB = &cworkB; 1752 if (!v) { 1753 pvA = NULL; 1754 pvB = NULL; 1755 } 1756 if (!idx) { 1757 pcA = NULL; 1758 if (!v) pcB = NULL; 1759 } 1760 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1761 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1762 nztot = nzA + nzB; 1763 1764 cmap = mat->garray; 1765 if (v || idx) { 1766 if (nztot) { 1767 /* Sort by increasing column numbers, assuming A and B already sorted */ 1768 PetscInt imark = -1; 1769 if (v) { 1770 *v = v_p = mat->rowvalues; 1771 for (i = 0; i < nzB; i++) { 1772 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1773 else break; 1774 } 1775 imark = i; 1776 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1777 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1778 } 1779 if (idx) { 1780 *idx = idx_p = mat->rowindices; 1781 if (imark > -1) { 1782 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1783 } else { 1784 for (i = 0; i < nzB; i++) { 1785 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1786 else break; 1787 } 1788 imark = i; 1789 } 1790 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1791 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1792 } 1793 } else { 1794 if (idx) *idx = NULL; 1795 if (v) *v = NULL; 1796 } 1797 } 1798 *nz = nztot; 1799 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1800 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1801 PetscFunctionReturn(PETSC_SUCCESS); 1802 } 1803 1804 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1805 { 1806 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1807 1808 PetscFunctionBegin; 1809 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1810 aij->getrowactive = PETSC_FALSE; 1811 PetscFunctionReturn(PETSC_SUCCESS); 1812 } 1813 1814 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1815 { 1816 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1817 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1818 PetscInt i, j, cstart = mat->cmap->rstart; 1819 PetscReal sum = 0.0; 1820 const MatScalar *v, *amata, *bmata; 1821 1822 PetscFunctionBegin; 1823 if (aij->size == 1) { 1824 PetscCall(MatNorm(aij->A, type, norm)); 1825 } else { 1826 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1827 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1828 if (type == NORM_FROBENIUS) { 1829 v = amata; 1830 for (i = 0; i < amat->nz; i++) { 1831 sum += PetscRealPart(PetscConj(*v) * (*v)); 1832 v++; 1833 } 1834 v = bmata; 1835 for (i = 0; i < bmat->nz; i++) { 1836 sum += PetscRealPart(PetscConj(*v) * (*v)); 1837 v++; 1838 } 1839 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1840 *norm = PetscSqrtReal(*norm); 1841 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1842 } else if (type == NORM_1) { /* max column norm */ 1843 PetscReal *tmp; 1844 PetscInt *jj, *garray = aij->garray; 1845 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1846 *norm = 0.0; 1847 v = amata; 1848 jj = amat->j; 1849 for (j = 0; j < amat->nz; j++) { 1850 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1851 v++; 1852 } 1853 v = bmata; 1854 jj = bmat->j; 1855 for (j = 0; j < bmat->nz; j++) { 1856 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1857 v++; 1858 } 1859 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1860 for (j = 0; j < mat->cmap->N; j++) { 1861 if (tmp[j] > *norm) *norm = tmp[j]; 1862 } 1863 PetscCall(PetscFree(tmp)); 1864 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1865 } else if (type == NORM_INFINITY) { /* max row norm */ 1866 PetscReal ntemp = 0.0; 1867 for (j = 0; j < aij->A->rmap->n; j++) { 1868 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1869 sum = 0.0; 1870 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1871 sum += PetscAbsScalar(*v); 1872 v++; 1873 } 1874 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1875 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); 1877 v++; 1878 } 1879 if (sum > ntemp) ntemp = sum; 1880 } 1881 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1882 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1883 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1884 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1885 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1886 } 1887 PetscFunctionReturn(PETSC_SUCCESS); 1888 } 1889 1890 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1891 { 1892 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1893 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1894 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1895 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1896 Mat B, A_diag, *B_diag; 1897 const MatScalar *pbv, *bv; 1898 1899 PetscFunctionBegin; 1900 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1901 ma = A->rmap->n; 1902 na = A->cmap->n; 1903 mb = a->B->rmap->n; 1904 nb = a->B->cmap->n; 1905 ai = Aloc->i; 1906 aj = Aloc->j; 1907 bi = Bloc->i; 1908 bj = Bloc->j; 1909 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1910 PetscInt *d_nnz, *g_nnz, *o_nnz; 1911 PetscSFNode *oloc; 1912 PETSC_UNUSED PetscSF sf; 1913 1914 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1915 /* compute d_nnz for preallocation */ 1916 PetscCall(PetscArrayzero(d_nnz, na)); 1917 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1918 /* compute local off-diagonal contributions */ 1919 PetscCall(PetscArrayzero(g_nnz, nb)); 1920 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1921 /* map those to global */ 1922 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1923 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1924 PetscCall(PetscSFSetFromOptions(sf)); 1925 PetscCall(PetscArrayzero(o_nnz, na)); 1926 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1927 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1928 PetscCall(PetscSFDestroy(&sf)); 1929 1930 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1931 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1932 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1933 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1934 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1935 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1936 } else { 1937 B = *matout; 1938 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1939 } 1940 1941 b = (Mat_MPIAIJ *)B->data; 1942 A_diag = a->A; 1943 B_diag = &b->A; 1944 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1945 A_diag_ncol = A_diag->cmap->N; 1946 B_diag_ilen = sub_B_diag->ilen; 1947 B_diag_i = sub_B_diag->i; 1948 1949 /* Set ilen for diagonal of B */ 1950 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1951 1952 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1953 very quickly (=without using MatSetValues), because all writes are local. */ 1954 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1955 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1956 1957 /* copy over the B part */ 1958 PetscCall(PetscMalloc1(bi[mb], &cols)); 1959 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1960 pbv = bv; 1961 row = A->rmap->rstart; 1962 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1963 cols_tmp = cols; 1964 for (i = 0; i < mb; i++) { 1965 ncol = bi[i + 1] - bi[i]; 1966 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1967 row++; 1968 if (pbv) pbv += ncol; 1969 if (cols_tmp) cols_tmp += ncol; 1970 } 1971 PetscCall(PetscFree(cols)); 1972 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1973 1974 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1975 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1976 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1977 *matout = B; 1978 } else { 1979 PetscCall(MatHeaderMerge(A, &B)); 1980 } 1981 PetscFunctionReturn(PETSC_SUCCESS); 1982 } 1983 1984 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1985 { 1986 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1987 Mat a = aij->A, b = aij->B; 1988 PetscInt s1, s2, s3; 1989 1990 PetscFunctionBegin; 1991 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1992 if (rr) { 1993 PetscCall(VecGetLocalSize(rr, &s1)); 1994 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1995 /* Overlap communication with computation. */ 1996 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1997 } 1998 if (ll) { 1999 PetscCall(VecGetLocalSize(ll, &s1)); 2000 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2001 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2002 } 2003 /* scale the diagonal block */ 2004 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2005 2006 if (rr) { 2007 /* Do a scatter end and then right scale the off-diagonal block */ 2008 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2009 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2010 } 2011 PetscFunctionReturn(PETSC_SUCCESS); 2012 } 2013 2014 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2015 { 2016 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2017 2018 PetscFunctionBegin; 2019 PetscCall(MatSetUnfactored(a->A)); 2020 PetscFunctionReturn(PETSC_SUCCESS); 2021 } 2022 2023 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2024 { 2025 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2026 Mat a, b, c, d; 2027 PetscBool flg; 2028 2029 PetscFunctionBegin; 2030 a = matA->A; 2031 b = matA->B; 2032 c = matB->A; 2033 d = matB->B; 2034 2035 PetscCall(MatEqual(a, c, &flg)); 2036 if (flg) PetscCall(MatEqual(b, d, &flg)); 2037 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2042 { 2043 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2044 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2045 2046 PetscFunctionBegin; 2047 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2048 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2049 /* because of the column compression in the off-processor part of the matrix a->B, 2050 the number of columns in a->B and b->B may be different, hence we cannot call 2051 the MatCopy() directly on the two parts. If need be, we can provide a more 2052 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2053 then copying the submatrices */ 2054 PetscCall(MatCopy_Basic(A, B, str)); 2055 } else { 2056 PetscCall(MatCopy(a->A, b->A, str)); 2057 PetscCall(MatCopy(a->B, b->B, str)); 2058 } 2059 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2060 PetscFunctionReturn(PETSC_SUCCESS); 2061 } 2062 2063 /* 2064 Computes the number of nonzeros per row needed for preallocation when X and Y 2065 have different nonzero structure. 2066 */ 2067 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2068 { 2069 PetscInt i, j, k, nzx, nzy; 2070 2071 PetscFunctionBegin; 2072 /* Set the number of nonzeros in the new matrix */ 2073 for (i = 0; i < m; i++) { 2074 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2075 nzx = xi[i + 1] - xi[i]; 2076 nzy = yi[i + 1] - yi[i]; 2077 nnz[i] = 0; 2078 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2079 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2080 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2081 nnz[i]++; 2082 } 2083 for (; k < nzy; k++) nnz[i]++; 2084 } 2085 PetscFunctionReturn(PETSC_SUCCESS); 2086 } 2087 2088 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2089 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2090 { 2091 PetscInt m = Y->rmap->N; 2092 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2093 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2094 2095 PetscFunctionBegin; 2096 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2097 PetscFunctionReturn(PETSC_SUCCESS); 2098 } 2099 2100 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2101 { 2102 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2103 2104 PetscFunctionBegin; 2105 if (str == SAME_NONZERO_PATTERN) { 2106 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2107 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2108 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2109 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2110 } else { 2111 Mat B; 2112 PetscInt *nnz_d, *nnz_o; 2113 2114 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2115 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2116 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2117 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2118 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2119 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2120 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2121 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2122 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2123 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2124 PetscCall(MatHeaderMerge(Y, &B)); 2125 PetscCall(PetscFree(nnz_d)); 2126 PetscCall(PetscFree(nnz_o)); 2127 } 2128 PetscFunctionReturn(PETSC_SUCCESS); 2129 } 2130 2131 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2132 2133 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2134 { 2135 PetscFunctionBegin; 2136 if (PetscDefined(USE_COMPLEX)) { 2137 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2138 2139 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2140 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2141 } 2142 PetscFunctionReturn(PETSC_SUCCESS); 2143 } 2144 2145 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2146 { 2147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2148 2149 PetscFunctionBegin; 2150 PetscCall(MatRealPart(a->A)); 2151 PetscCall(MatRealPart(a->B)); 2152 PetscFunctionReturn(PETSC_SUCCESS); 2153 } 2154 2155 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2156 { 2157 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2158 2159 PetscFunctionBegin; 2160 PetscCall(MatImaginaryPart(a->A)); 2161 PetscCall(MatImaginaryPart(a->B)); 2162 PetscFunctionReturn(PETSC_SUCCESS); 2163 } 2164 2165 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2166 { 2167 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2168 PetscInt i, *idxb = NULL, m = A->rmap->n; 2169 PetscScalar *vv; 2170 Vec vB, vA; 2171 const PetscScalar *va, *vb; 2172 2173 PetscFunctionBegin; 2174 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2175 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2176 2177 PetscCall(VecGetArrayRead(vA, &va)); 2178 if (idx) { 2179 for (i = 0; i < m; i++) { 2180 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2181 } 2182 } 2183 2184 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2185 PetscCall(PetscMalloc1(m, &idxb)); 2186 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2187 2188 PetscCall(VecGetArrayWrite(v, &vv)); 2189 PetscCall(VecGetArrayRead(vB, &vb)); 2190 for (i = 0; i < m; i++) { 2191 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2192 vv[i] = vb[i]; 2193 if (idx) idx[i] = a->garray[idxb[i]]; 2194 } else { 2195 vv[i] = va[i]; 2196 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2197 } 2198 } 2199 PetscCall(VecRestoreArrayWrite(v, &vv)); 2200 PetscCall(VecRestoreArrayRead(vA, &va)); 2201 PetscCall(VecRestoreArrayRead(vB, &vb)); 2202 PetscCall(PetscFree(idxb)); 2203 PetscCall(VecDestroy(&vA)); 2204 PetscCall(VecDestroy(&vB)); 2205 PetscFunctionReturn(PETSC_SUCCESS); 2206 } 2207 2208 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2209 { 2210 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2211 Vec vB, vA; 2212 2213 PetscFunctionBegin; 2214 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2215 PetscCall(MatGetRowSumAbs(a->A, vA)); 2216 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2217 PetscCall(MatGetRowSumAbs(a->B, vB)); 2218 PetscCall(VecAXPY(vA, 1.0, vB)); 2219 PetscCall(VecDestroy(&vB)); 2220 PetscCall(VecCopy(vA, v)); 2221 PetscCall(VecDestroy(&vA)); 2222 PetscFunctionReturn(PETSC_SUCCESS); 2223 } 2224 2225 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2226 { 2227 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2228 PetscInt m = A->rmap->n, n = A->cmap->n; 2229 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2230 PetscInt *cmap = mat->garray; 2231 PetscInt *diagIdx, *offdiagIdx; 2232 Vec diagV, offdiagV; 2233 PetscScalar *a, *diagA, *offdiagA; 2234 const PetscScalar *ba, *bav; 2235 PetscInt r, j, col, ncols, *bi, *bj; 2236 Mat B = mat->B; 2237 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2238 2239 PetscFunctionBegin; 2240 /* When a process holds entire A and other processes have no entry */ 2241 if (A->cmap->N == n) { 2242 PetscCall(VecGetArrayWrite(v, &diagA)); 2243 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2244 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2245 PetscCall(VecDestroy(&diagV)); 2246 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } else if (n == 0) { 2249 if (m) { 2250 PetscCall(VecGetArrayWrite(v, &a)); 2251 for (r = 0; r < m; r++) { 2252 a[r] = 0.0; 2253 if (idx) idx[r] = -1; 2254 } 2255 PetscCall(VecRestoreArrayWrite(v, &a)); 2256 } 2257 PetscFunctionReturn(PETSC_SUCCESS); 2258 } 2259 2260 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2261 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2262 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2263 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2264 2265 /* Get offdiagIdx[] for implicit 0.0 */ 2266 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2267 ba = bav; 2268 bi = b->i; 2269 bj = b->j; 2270 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2271 for (r = 0; r < m; r++) { 2272 ncols = bi[r + 1] - bi[r]; 2273 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2274 offdiagA[r] = *ba; 2275 offdiagIdx[r] = cmap[0]; 2276 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2277 offdiagA[r] = 0.0; 2278 2279 /* Find first hole in the cmap */ 2280 for (j = 0; j < ncols; j++) { 2281 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2282 if (col > j && j < cstart) { 2283 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2284 break; 2285 } else if (col > j + n && j >= cstart) { 2286 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2287 break; 2288 } 2289 } 2290 if (j == ncols && ncols < A->cmap->N - n) { 2291 /* a hole is outside compressed Bcols */ 2292 if (ncols == 0) { 2293 if (cstart) { 2294 offdiagIdx[r] = 0; 2295 } else offdiagIdx[r] = cend; 2296 } else { /* ncols > 0 */ 2297 offdiagIdx[r] = cmap[ncols - 1] + 1; 2298 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2299 } 2300 } 2301 } 2302 2303 for (j = 0; j < ncols; j++) { 2304 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2305 offdiagA[r] = *ba; 2306 offdiagIdx[r] = cmap[*bj]; 2307 } 2308 ba++; 2309 bj++; 2310 } 2311 } 2312 2313 PetscCall(VecGetArrayWrite(v, &a)); 2314 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2315 for (r = 0; r < m; ++r) { 2316 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2317 a[r] = diagA[r]; 2318 if (idx) idx[r] = cstart + diagIdx[r]; 2319 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2320 a[r] = diagA[r]; 2321 if (idx) { 2322 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2323 idx[r] = cstart + diagIdx[r]; 2324 } else idx[r] = offdiagIdx[r]; 2325 } 2326 } else { 2327 a[r] = offdiagA[r]; 2328 if (idx) idx[r] = offdiagIdx[r]; 2329 } 2330 } 2331 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2332 PetscCall(VecRestoreArrayWrite(v, &a)); 2333 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2334 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2335 PetscCall(VecDestroy(&diagV)); 2336 PetscCall(VecDestroy(&offdiagV)); 2337 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2338 PetscFunctionReturn(PETSC_SUCCESS); 2339 } 2340 2341 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2342 { 2343 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2344 PetscInt m = A->rmap->n, n = A->cmap->n; 2345 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2346 PetscInt *cmap = mat->garray; 2347 PetscInt *diagIdx, *offdiagIdx; 2348 Vec diagV, offdiagV; 2349 PetscScalar *a, *diagA, *offdiagA; 2350 const PetscScalar *ba, *bav; 2351 PetscInt r, j, col, ncols, *bi, *bj; 2352 Mat B = mat->B; 2353 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2354 2355 PetscFunctionBegin; 2356 /* When a process holds entire A and other processes have no entry */ 2357 if (A->cmap->N == n) { 2358 PetscCall(VecGetArrayWrite(v, &diagA)); 2359 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2360 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2361 PetscCall(VecDestroy(&diagV)); 2362 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2363 PetscFunctionReturn(PETSC_SUCCESS); 2364 } else if (n == 0) { 2365 if (m) { 2366 PetscCall(VecGetArrayWrite(v, &a)); 2367 for (r = 0; r < m; r++) { 2368 a[r] = PETSC_MAX_REAL; 2369 if (idx) idx[r] = -1; 2370 } 2371 PetscCall(VecRestoreArrayWrite(v, &a)); 2372 } 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2377 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2378 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2379 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2380 2381 /* Get offdiagIdx[] for implicit 0.0 */ 2382 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2383 ba = bav; 2384 bi = b->i; 2385 bj = b->j; 2386 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2387 for (r = 0; r < m; r++) { 2388 ncols = bi[r + 1] - bi[r]; 2389 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2390 offdiagA[r] = *ba; 2391 offdiagIdx[r] = cmap[0]; 2392 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2393 offdiagA[r] = 0.0; 2394 2395 /* Find first hole in the cmap */ 2396 for (j = 0; j < ncols; j++) { 2397 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2398 if (col > j && j < cstart) { 2399 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2400 break; 2401 } else if (col > j + n && j >= cstart) { 2402 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2403 break; 2404 } 2405 } 2406 if (j == ncols && ncols < A->cmap->N - n) { 2407 /* a hole is outside compressed Bcols */ 2408 if (ncols == 0) { 2409 if (cstart) { 2410 offdiagIdx[r] = 0; 2411 } else offdiagIdx[r] = cend; 2412 } else { /* ncols > 0 */ 2413 offdiagIdx[r] = cmap[ncols - 1] + 1; 2414 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2415 } 2416 } 2417 } 2418 2419 for (j = 0; j < ncols; j++) { 2420 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2421 offdiagA[r] = *ba; 2422 offdiagIdx[r] = cmap[*bj]; 2423 } 2424 ba++; 2425 bj++; 2426 } 2427 } 2428 2429 PetscCall(VecGetArrayWrite(v, &a)); 2430 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2431 for (r = 0; r < m; ++r) { 2432 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2433 a[r] = diagA[r]; 2434 if (idx) idx[r] = cstart + diagIdx[r]; 2435 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2436 a[r] = diagA[r]; 2437 if (idx) { 2438 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2439 idx[r] = cstart + diagIdx[r]; 2440 } else idx[r] = offdiagIdx[r]; 2441 } 2442 } else { 2443 a[r] = offdiagA[r]; 2444 if (idx) idx[r] = offdiagIdx[r]; 2445 } 2446 } 2447 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2448 PetscCall(VecRestoreArrayWrite(v, &a)); 2449 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2450 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2451 PetscCall(VecDestroy(&diagV)); 2452 PetscCall(VecDestroy(&offdiagV)); 2453 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2454 PetscFunctionReturn(PETSC_SUCCESS); 2455 } 2456 2457 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2458 { 2459 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2460 PetscInt m = A->rmap->n, n = A->cmap->n; 2461 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2462 PetscInt *cmap = mat->garray; 2463 PetscInt *diagIdx, *offdiagIdx; 2464 Vec diagV, offdiagV; 2465 PetscScalar *a, *diagA, *offdiagA; 2466 const PetscScalar *ba, *bav; 2467 PetscInt r, j, col, ncols, *bi, *bj; 2468 Mat B = mat->B; 2469 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2470 2471 PetscFunctionBegin; 2472 /* When a process holds entire A and other processes have no entry */ 2473 if (A->cmap->N == n) { 2474 PetscCall(VecGetArrayWrite(v, &diagA)); 2475 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2476 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2477 PetscCall(VecDestroy(&diagV)); 2478 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2479 PetscFunctionReturn(PETSC_SUCCESS); 2480 } else if (n == 0) { 2481 if (m) { 2482 PetscCall(VecGetArrayWrite(v, &a)); 2483 for (r = 0; r < m; r++) { 2484 a[r] = PETSC_MIN_REAL; 2485 if (idx) idx[r] = -1; 2486 } 2487 PetscCall(VecRestoreArrayWrite(v, &a)); 2488 } 2489 PetscFunctionReturn(PETSC_SUCCESS); 2490 } 2491 2492 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2493 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2494 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2495 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2496 2497 /* Get offdiagIdx[] for implicit 0.0 */ 2498 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2499 ba = bav; 2500 bi = b->i; 2501 bj = b->j; 2502 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2503 for (r = 0; r < m; r++) { 2504 ncols = bi[r + 1] - bi[r]; 2505 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2506 offdiagA[r] = *ba; 2507 offdiagIdx[r] = cmap[0]; 2508 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2509 offdiagA[r] = 0.0; 2510 2511 /* Find first hole in the cmap */ 2512 for (j = 0; j < ncols; j++) { 2513 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2514 if (col > j && j < cstart) { 2515 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2516 break; 2517 } else if (col > j + n && j >= cstart) { 2518 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2519 break; 2520 } 2521 } 2522 if (j == ncols && ncols < A->cmap->N - n) { 2523 /* a hole is outside compressed Bcols */ 2524 if (ncols == 0) { 2525 if (cstart) { 2526 offdiagIdx[r] = 0; 2527 } else offdiagIdx[r] = cend; 2528 } else { /* ncols > 0 */ 2529 offdiagIdx[r] = cmap[ncols - 1] + 1; 2530 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2531 } 2532 } 2533 } 2534 2535 for (j = 0; j < ncols; j++) { 2536 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2537 offdiagA[r] = *ba; 2538 offdiagIdx[r] = cmap[*bj]; 2539 } 2540 ba++; 2541 bj++; 2542 } 2543 } 2544 2545 PetscCall(VecGetArrayWrite(v, &a)); 2546 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2547 for (r = 0; r < m; ++r) { 2548 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2549 a[r] = diagA[r]; 2550 if (idx) idx[r] = cstart + diagIdx[r]; 2551 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2552 a[r] = diagA[r]; 2553 if (idx) { 2554 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2555 idx[r] = cstart + diagIdx[r]; 2556 } else idx[r] = offdiagIdx[r]; 2557 } 2558 } else { 2559 a[r] = offdiagA[r]; 2560 if (idx) idx[r] = offdiagIdx[r]; 2561 } 2562 } 2563 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2564 PetscCall(VecRestoreArrayWrite(v, &a)); 2565 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2566 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2567 PetscCall(VecDestroy(&diagV)); 2568 PetscCall(VecDestroy(&offdiagV)); 2569 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2570 PetscFunctionReturn(PETSC_SUCCESS); 2571 } 2572 2573 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2574 { 2575 Mat *dummy; 2576 2577 PetscFunctionBegin; 2578 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2579 *newmat = *dummy; 2580 PetscCall(PetscFree(dummy)); 2581 PetscFunctionReturn(PETSC_SUCCESS); 2582 } 2583 2584 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2585 { 2586 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2587 2588 PetscFunctionBegin; 2589 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2590 A->factorerrortype = a->A->factorerrortype; 2591 PetscFunctionReturn(PETSC_SUCCESS); 2592 } 2593 2594 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2595 { 2596 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2597 2598 PetscFunctionBegin; 2599 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2600 PetscCall(MatSetRandom(aij->A, rctx)); 2601 if (x->assembled) { 2602 PetscCall(MatSetRandom(aij->B, rctx)); 2603 } else { 2604 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2605 } 2606 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2607 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2612 { 2613 PetscFunctionBegin; 2614 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2615 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2616 PetscFunctionReturn(PETSC_SUCCESS); 2617 } 2618 2619 /*@ 2620 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2621 2622 Not Collective 2623 2624 Input Parameter: 2625 . A - the matrix 2626 2627 Output Parameter: 2628 . nz - the number of nonzeros 2629 2630 Level: advanced 2631 2632 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2633 @*/ 2634 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2635 { 2636 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2637 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2638 PetscBool isaij; 2639 2640 PetscFunctionBegin; 2641 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2642 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2643 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2644 PetscFunctionReturn(PETSC_SUCCESS); 2645 } 2646 2647 /*@ 2648 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2649 2650 Collective 2651 2652 Input Parameters: 2653 + A - the matrix 2654 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2655 2656 Level: advanced 2657 2658 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2659 @*/ 2660 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2661 { 2662 PetscFunctionBegin; 2663 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2664 PetscFunctionReturn(PETSC_SUCCESS); 2665 } 2666 2667 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2668 { 2669 PetscBool sc = PETSC_FALSE, flg; 2670 2671 PetscFunctionBegin; 2672 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2673 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2674 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2675 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2676 PetscOptionsHeadEnd(); 2677 PetscFunctionReturn(PETSC_SUCCESS); 2678 } 2679 2680 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2681 { 2682 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2683 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2684 2685 PetscFunctionBegin; 2686 if (!Y->preallocated) { 2687 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2688 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2689 PetscInt nonew = aij->nonew; 2690 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2691 aij->nonew = nonew; 2692 } 2693 PetscCall(MatShift_Basic(Y, a)); 2694 PetscFunctionReturn(PETSC_SUCCESS); 2695 } 2696 2697 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2698 { 2699 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2700 2701 PetscFunctionBegin; 2702 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2703 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2704 if (d) { 2705 PetscInt rstart; 2706 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2707 *d += rstart; 2708 } 2709 PetscFunctionReturn(PETSC_SUCCESS); 2710 } 2711 2712 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2713 { 2714 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2715 2716 PetscFunctionBegin; 2717 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2718 PetscFunctionReturn(PETSC_SUCCESS); 2719 } 2720 2721 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2722 { 2723 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2724 2725 PetscFunctionBegin; 2726 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2727 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2728 PetscFunctionReturn(PETSC_SUCCESS); 2729 } 2730 2731 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2732 MatGetRow_MPIAIJ, 2733 MatRestoreRow_MPIAIJ, 2734 MatMult_MPIAIJ, 2735 /* 4*/ MatMultAdd_MPIAIJ, 2736 MatMultTranspose_MPIAIJ, 2737 MatMultTransposeAdd_MPIAIJ, 2738 NULL, 2739 NULL, 2740 NULL, 2741 /*10*/ NULL, 2742 NULL, 2743 NULL, 2744 MatSOR_MPIAIJ, 2745 MatTranspose_MPIAIJ, 2746 /*15*/ MatGetInfo_MPIAIJ, 2747 MatEqual_MPIAIJ, 2748 MatGetDiagonal_MPIAIJ, 2749 MatDiagonalScale_MPIAIJ, 2750 MatNorm_MPIAIJ, 2751 /*20*/ MatAssemblyBegin_MPIAIJ, 2752 MatAssemblyEnd_MPIAIJ, 2753 MatSetOption_MPIAIJ, 2754 MatZeroEntries_MPIAIJ, 2755 /*24*/ MatZeroRows_MPIAIJ, 2756 NULL, 2757 NULL, 2758 NULL, 2759 NULL, 2760 /*29*/ MatSetUp_MPI_Hash, 2761 NULL, 2762 NULL, 2763 MatGetDiagonalBlock_MPIAIJ, 2764 NULL, 2765 /*34*/ MatDuplicate_MPIAIJ, 2766 NULL, 2767 NULL, 2768 NULL, 2769 NULL, 2770 /*39*/ MatAXPY_MPIAIJ, 2771 MatCreateSubMatrices_MPIAIJ, 2772 MatIncreaseOverlap_MPIAIJ, 2773 MatGetValues_MPIAIJ, 2774 MatCopy_MPIAIJ, 2775 /*44*/ MatGetRowMax_MPIAIJ, 2776 MatScale_MPIAIJ, 2777 MatShift_MPIAIJ, 2778 MatDiagonalSet_MPIAIJ, 2779 MatZeroRowsColumns_MPIAIJ, 2780 /*49*/ MatSetRandom_MPIAIJ, 2781 MatGetRowIJ_MPIAIJ, 2782 MatRestoreRowIJ_MPIAIJ, 2783 NULL, 2784 NULL, 2785 /*54*/ MatFDColoringCreate_MPIXAIJ, 2786 NULL, 2787 MatSetUnfactored_MPIAIJ, 2788 MatPermute_MPIAIJ, 2789 NULL, 2790 /*59*/ MatCreateSubMatrix_MPIAIJ, 2791 MatDestroy_MPIAIJ, 2792 MatView_MPIAIJ, 2793 NULL, 2794 NULL, 2795 /*64*/ NULL, 2796 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 NULL, 2800 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2801 MatGetRowMinAbs_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 NULL, 2806 /*75*/ MatFDColoringApply_AIJ, 2807 MatSetFromOptions_MPIAIJ, 2808 NULL, 2809 NULL, 2810 MatFindZeroDiagonals_MPIAIJ, 2811 /*80*/ NULL, 2812 NULL, 2813 NULL, 2814 /*83*/ MatLoad_MPIAIJ, 2815 NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 NULL, 2820 /*89*/ NULL, 2821 NULL, 2822 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2823 NULL, 2824 NULL, 2825 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2826 NULL, 2827 NULL, 2828 NULL, 2829 MatBindToCPU_MPIAIJ, 2830 /*99*/ MatProductSetFromOptions_MPIAIJ, 2831 NULL, 2832 NULL, 2833 MatConjugate_MPIAIJ, 2834 NULL, 2835 /*104*/ MatSetValuesRow_MPIAIJ, 2836 MatRealPart_MPIAIJ, 2837 MatImaginaryPart_MPIAIJ, 2838 NULL, 2839 NULL, 2840 /*109*/ NULL, 2841 NULL, 2842 MatGetRowMin_MPIAIJ, 2843 NULL, 2844 MatMissingDiagonal_MPIAIJ, 2845 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2846 NULL, 2847 MatGetGhosts_MPIAIJ, 2848 NULL, 2849 NULL, 2850 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2851 NULL, 2852 NULL, 2853 NULL, 2854 MatGetMultiProcBlock_MPIAIJ, 2855 /*124*/ MatFindNonzeroRows_MPIAIJ, 2856 MatGetColumnReductions_MPIAIJ, 2857 MatInvertBlockDiagonal_MPIAIJ, 2858 MatInvertVariableBlockDiagonal_MPIAIJ, 2859 MatCreateSubMatricesMPI_MPIAIJ, 2860 /*129*/ NULL, 2861 NULL, 2862 NULL, 2863 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2864 NULL, 2865 /*134*/ NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 NULL, 2870 /*139*/ MatSetBlockSizes_MPIAIJ, 2871 NULL, 2872 NULL, 2873 MatFDColoringSetUp_MPIXAIJ, 2874 MatFindOffBlockDiagonalEntries_MPIAIJ, 2875 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2876 /*145*/ NULL, 2877 NULL, 2878 NULL, 2879 MatCreateGraph_Simple_AIJ, 2880 NULL, 2881 /*150*/ NULL, 2882 MatEliminateZeros_MPIAIJ, 2883 MatGetRowSumAbs_MPIAIJ, 2884 NULL, 2885 NULL, 2886 /*155*/ NULL, 2887 MatCopyHashToXAIJ_MPI_Hash}; 2888 2889 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2890 { 2891 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2892 2893 PetscFunctionBegin; 2894 PetscCall(MatStoreValues(aij->A)); 2895 PetscCall(MatStoreValues(aij->B)); 2896 PetscFunctionReturn(PETSC_SUCCESS); 2897 } 2898 2899 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2900 { 2901 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2902 2903 PetscFunctionBegin; 2904 PetscCall(MatRetrieveValues(aij->A)); 2905 PetscCall(MatRetrieveValues(aij->B)); 2906 PetscFunctionReturn(PETSC_SUCCESS); 2907 } 2908 2909 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2910 { 2911 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2912 PetscMPIInt size; 2913 2914 PetscFunctionBegin; 2915 if (B->hash_active) { 2916 B->ops[0] = b->cops; 2917 B->hash_active = PETSC_FALSE; 2918 } 2919 PetscCall(PetscLayoutSetUp(B->rmap)); 2920 PetscCall(PetscLayoutSetUp(B->cmap)); 2921 2922 #if defined(PETSC_USE_CTABLE) 2923 PetscCall(PetscHMapIDestroy(&b->colmap)); 2924 #else 2925 PetscCall(PetscFree(b->colmap)); 2926 #endif 2927 PetscCall(PetscFree(b->garray)); 2928 PetscCall(VecDestroy(&b->lvec)); 2929 PetscCall(VecScatterDestroy(&b->Mvctx)); 2930 2931 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2932 2933 MatSeqXAIJGetOptions_Private(b->B); 2934 PetscCall(MatDestroy(&b->B)); 2935 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2936 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2937 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2938 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2939 MatSeqXAIJRestoreOptions_Private(b->B); 2940 2941 MatSeqXAIJGetOptions_Private(b->A); 2942 PetscCall(MatDestroy(&b->A)); 2943 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2944 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2945 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2946 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2947 MatSeqXAIJRestoreOptions_Private(b->A); 2948 2949 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2950 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2951 B->preallocated = PETSC_TRUE; 2952 B->was_assembled = PETSC_FALSE; 2953 B->assembled = PETSC_FALSE; 2954 PetscFunctionReturn(PETSC_SUCCESS); 2955 } 2956 2957 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2958 { 2959 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2960 2961 PetscFunctionBegin; 2962 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2963 PetscCall(PetscLayoutSetUp(B->rmap)); 2964 PetscCall(PetscLayoutSetUp(B->cmap)); 2965 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2966 else { 2967 #if defined(PETSC_USE_CTABLE) 2968 PetscCall(PetscHMapIDestroy(&b->colmap)); 2969 #else 2970 PetscCall(PetscFree(b->colmap)); 2971 #endif 2972 PetscCall(PetscFree(b->garray)); 2973 PetscCall(VecDestroy(&b->lvec)); 2974 } 2975 PetscCall(VecScatterDestroy(&b->Mvctx)); 2976 2977 PetscCall(MatResetPreallocation(b->A)); 2978 PetscCall(MatResetPreallocation(b->B)); 2979 B->preallocated = PETSC_TRUE; 2980 B->was_assembled = PETSC_FALSE; 2981 B->assembled = PETSC_FALSE; 2982 PetscFunctionReturn(PETSC_SUCCESS); 2983 } 2984 2985 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2986 { 2987 Mat mat; 2988 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2989 2990 PetscFunctionBegin; 2991 *newmat = NULL; 2992 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2993 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2994 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2995 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2996 a = (Mat_MPIAIJ *)mat->data; 2997 2998 mat->factortype = matin->factortype; 2999 mat->assembled = matin->assembled; 3000 mat->insertmode = NOT_SET_VALUES; 3001 3002 a->size = oldmat->size; 3003 a->rank = oldmat->rank; 3004 a->donotstash = oldmat->donotstash; 3005 a->roworiented = oldmat->roworiented; 3006 a->rowindices = NULL; 3007 a->rowvalues = NULL; 3008 a->getrowactive = PETSC_FALSE; 3009 3010 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3011 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3012 if (matin->hash_active) { 3013 PetscCall(MatSetUp(mat)); 3014 } else { 3015 mat->preallocated = matin->preallocated; 3016 if (oldmat->colmap) { 3017 #if defined(PETSC_USE_CTABLE) 3018 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3019 #else 3020 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3021 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3022 #endif 3023 } else a->colmap = NULL; 3024 if (oldmat->garray) { 3025 PetscInt len; 3026 len = oldmat->B->cmap->n; 3027 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3028 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3029 } else a->garray = NULL; 3030 3031 /* It may happen MatDuplicate is called with a non-assembled matrix 3032 In fact, MatDuplicate only requires the matrix to be preallocated 3033 This may happen inside a DMCreateMatrix_Shell */ 3034 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3035 if (oldmat->Mvctx) { 3036 a->Mvctx = oldmat->Mvctx; 3037 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3038 } 3039 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3040 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3041 } 3042 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3043 *newmat = mat; 3044 PetscFunctionReturn(PETSC_SUCCESS); 3045 } 3046 3047 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3048 { 3049 PetscBool isbinary, ishdf5; 3050 3051 PetscFunctionBegin; 3052 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3053 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3054 /* force binary viewer to load .info file if it has not yet done so */ 3055 PetscCall(PetscViewerSetUp(viewer)); 3056 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3057 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3058 if (isbinary) { 3059 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3060 } else if (ishdf5) { 3061 #if defined(PETSC_HAVE_HDF5) 3062 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3063 #else 3064 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3065 #endif 3066 } else { 3067 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3068 } 3069 PetscFunctionReturn(PETSC_SUCCESS); 3070 } 3071 3072 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3073 { 3074 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3075 PetscInt *rowidxs, *colidxs; 3076 PetscScalar *matvals; 3077 3078 PetscFunctionBegin; 3079 PetscCall(PetscViewerSetUp(viewer)); 3080 3081 /* read in matrix header */ 3082 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3083 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3084 M = header[1]; 3085 N = header[2]; 3086 nz = header[3]; 3087 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3088 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3089 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3090 3091 /* set block sizes from the viewer's .info file */ 3092 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3093 /* set global sizes if not set already */ 3094 if (mat->rmap->N < 0) mat->rmap->N = M; 3095 if (mat->cmap->N < 0) mat->cmap->N = N; 3096 PetscCall(PetscLayoutSetUp(mat->rmap)); 3097 PetscCall(PetscLayoutSetUp(mat->cmap)); 3098 3099 /* check if the matrix sizes are correct */ 3100 PetscCall(MatGetSize(mat, &rows, &cols)); 3101 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3102 3103 /* read in row lengths and build row indices */ 3104 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3105 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3106 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3107 rowidxs[0] = 0; 3108 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3109 if (nz != PETSC_INT_MAX) { 3110 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3111 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3112 } 3113 3114 /* read in column indices and matrix values */ 3115 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3116 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3117 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3118 /* store matrix indices and values */ 3119 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3120 PetscCall(PetscFree(rowidxs)); 3121 PetscCall(PetscFree2(colidxs, matvals)); 3122 PetscFunctionReturn(PETSC_SUCCESS); 3123 } 3124 3125 /* Not scalable because of ISAllGather() unless getting all columns. */ 3126 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3127 { 3128 IS iscol_local; 3129 PetscBool isstride; 3130 PetscMPIInt gisstride = 0; 3131 3132 PetscFunctionBegin; 3133 /* check if we are grabbing all columns*/ 3134 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3135 3136 if (isstride) { 3137 PetscInt start, len, mstart, mlen; 3138 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3139 PetscCall(ISGetLocalSize(iscol, &len)); 3140 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3141 if (mstart == start && mlen - mstart == len) gisstride = 1; 3142 } 3143 3144 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3145 if (gisstride) { 3146 PetscInt N; 3147 PetscCall(MatGetSize(mat, NULL, &N)); 3148 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3149 PetscCall(ISSetIdentity(iscol_local)); 3150 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3151 } else { 3152 PetscInt cbs; 3153 PetscCall(ISGetBlockSize(iscol, &cbs)); 3154 PetscCall(ISAllGather(iscol, &iscol_local)); 3155 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3156 } 3157 3158 *isseq = iscol_local; 3159 PetscFunctionReturn(PETSC_SUCCESS); 3160 } 3161 3162 /* 3163 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3164 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3165 3166 Input Parameters: 3167 + mat - matrix 3168 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3169 i.e., mat->rstart <= isrow[i] < mat->rend 3170 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3171 i.e., mat->cstart <= iscol[i] < mat->cend 3172 3173 Output Parameters: 3174 + isrow_d - sequential row index set for retrieving mat->A 3175 . iscol_d - sequential column index set for retrieving mat->A 3176 . iscol_o - sequential column index set for retrieving mat->B 3177 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3178 */ 3179 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3180 { 3181 Vec x, cmap; 3182 const PetscInt *is_idx; 3183 PetscScalar *xarray, *cmaparray; 3184 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3185 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3186 Mat B = a->B; 3187 Vec lvec = a->lvec, lcmap; 3188 PetscInt i, cstart, cend, Bn = B->cmap->N; 3189 MPI_Comm comm; 3190 VecScatter Mvctx = a->Mvctx; 3191 3192 PetscFunctionBegin; 3193 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3194 PetscCall(ISGetLocalSize(iscol, &ncols)); 3195 3196 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3197 PetscCall(MatCreateVecs(mat, &x, NULL)); 3198 PetscCall(VecSet(x, -1.0)); 3199 PetscCall(VecDuplicate(x, &cmap)); 3200 PetscCall(VecSet(cmap, -1.0)); 3201 3202 /* Get start indices */ 3203 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3204 isstart -= ncols; 3205 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3206 3207 PetscCall(ISGetIndices(iscol, &is_idx)); 3208 PetscCall(VecGetArray(x, &xarray)); 3209 PetscCall(VecGetArray(cmap, &cmaparray)); 3210 PetscCall(PetscMalloc1(ncols, &idx)); 3211 for (i = 0; i < ncols; i++) { 3212 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3213 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3214 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3215 } 3216 PetscCall(VecRestoreArray(x, &xarray)); 3217 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3218 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3219 3220 /* Get iscol_d */ 3221 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3222 PetscCall(ISGetBlockSize(iscol, &i)); 3223 PetscCall(ISSetBlockSize(*iscol_d, i)); 3224 3225 /* Get isrow_d */ 3226 PetscCall(ISGetLocalSize(isrow, &m)); 3227 rstart = mat->rmap->rstart; 3228 PetscCall(PetscMalloc1(m, &idx)); 3229 PetscCall(ISGetIndices(isrow, &is_idx)); 3230 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3231 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3232 3233 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3234 PetscCall(ISGetBlockSize(isrow, &i)); 3235 PetscCall(ISSetBlockSize(*isrow_d, i)); 3236 3237 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3238 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3239 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3240 3241 PetscCall(VecDuplicate(lvec, &lcmap)); 3242 3243 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3244 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3245 3246 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3247 /* off-process column indices */ 3248 count = 0; 3249 PetscCall(PetscMalloc1(Bn, &idx)); 3250 PetscCall(PetscMalloc1(Bn, &cmap1)); 3251 3252 PetscCall(VecGetArray(lvec, &xarray)); 3253 PetscCall(VecGetArray(lcmap, &cmaparray)); 3254 for (i = 0; i < Bn; i++) { 3255 if (PetscRealPart(xarray[i]) > -1.0) { 3256 idx[count] = i; /* local column index in off-diagonal part B */ 3257 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3258 count++; 3259 } 3260 } 3261 PetscCall(VecRestoreArray(lvec, &xarray)); 3262 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3263 3264 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3265 /* cannot ensure iscol_o has same blocksize as iscol! */ 3266 3267 PetscCall(PetscFree(idx)); 3268 *garray = cmap1; 3269 3270 PetscCall(VecDestroy(&x)); 3271 PetscCall(VecDestroy(&cmap)); 3272 PetscCall(VecDestroy(&lcmap)); 3273 PetscFunctionReturn(PETSC_SUCCESS); 3274 } 3275 3276 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3277 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3278 { 3279 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3280 Mat M = NULL; 3281 MPI_Comm comm; 3282 IS iscol_d, isrow_d, iscol_o; 3283 Mat Asub = NULL, Bsub = NULL; 3284 PetscInt n; 3285 3286 PetscFunctionBegin; 3287 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3288 3289 if (call == MAT_REUSE_MATRIX) { 3290 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3291 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3292 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3293 3294 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3295 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3296 3297 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3298 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3299 3300 /* Update diagonal and off-diagonal portions of submat */ 3301 asub = (Mat_MPIAIJ *)(*submat)->data; 3302 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3303 PetscCall(ISGetLocalSize(iscol_o, &n)); 3304 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3305 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3306 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3307 3308 } else { /* call == MAT_INITIAL_MATRIX) */ 3309 PetscInt *garray; 3310 PetscInt BsubN; 3311 3312 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3313 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3314 3315 /* Create local submatrices Asub and Bsub */ 3316 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3317 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3318 3319 /* Create submatrix M */ 3320 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3321 3322 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3323 asub = (Mat_MPIAIJ *)M->data; 3324 3325 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3326 n = asub->B->cmap->N; 3327 if (BsubN > n) { 3328 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3329 const PetscInt *idx; 3330 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3331 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3332 3333 PetscCall(PetscMalloc1(n, &idx_new)); 3334 j = 0; 3335 PetscCall(ISGetIndices(iscol_o, &idx)); 3336 for (i = 0; i < n; i++) { 3337 if (j >= BsubN) break; 3338 while (subgarray[i] > garray[j]) j++; 3339 3340 if (subgarray[i] == garray[j]) { 3341 idx_new[i] = idx[j++]; 3342 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3343 } 3344 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3345 3346 PetscCall(ISDestroy(&iscol_o)); 3347 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3348 3349 } else if (BsubN < n) { 3350 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3351 } 3352 3353 PetscCall(PetscFree(garray)); 3354 *submat = M; 3355 3356 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3357 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3358 PetscCall(ISDestroy(&isrow_d)); 3359 3360 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3361 PetscCall(ISDestroy(&iscol_d)); 3362 3363 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3364 PetscCall(ISDestroy(&iscol_o)); 3365 } 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3370 { 3371 IS iscol_local = NULL, isrow_d; 3372 PetscInt csize; 3373 PetscInt n, i, j, start, end; 3374 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3375 MPI_Comm comm; 3376 3377 PetscFunctionBegin; 3378 /* If isrow has same processor distribution as mat, 3379 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3380 if (call == MAT_REUSE_MATRIX) { 3381 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3382 if (isrow_d) { 3383 sameRowDist = PETSC_TRUE; 3384 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3385 } else { 3386 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3387 if (iscol_local) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3390 } 3391 } 3392 } else { 3393 /* Check if isrow has same processor distribution as mat */ 3394 sameDist[0] = PETSC_FALSE; 3395 PetscCall(ISGetLocalSize(isrow, &n)); 3396 if (!n) { 3397 sameDist[0] = PETSC_TRUE; 3398 } else { 3399 PetscCall(ISGetMinMax(isrow, &i, &j)); 3400 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3401 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3402 } 3403 3404 /* Check if iscol has same processor distribution as mat */ 3405 sameDist[1] = PETSC_FALSE; 3406 PetscCall(ISGetLocalSize(iscol, &n)); 3407 if (!n) { 3408 sameDist[1] = PETSC_TRUE; 3409 } else { 3410 PetscCall(ISGetMinMax(iscol, &i, &j)); 3411 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3412 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3413 } 3414 3415 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3416 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3417 sameRowDist = tsameDist[0]; 3418 } 3419 3420 if (sameRowDist) { 3421 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3422 /* isrow and iscol have same processor distribution as mat */ 3423 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } else { /* sameRowDist */ 3426 /* isrow has same processor distribution as mat */ 3427 if (call == MAT_INITIAL_MATRIX) { 3428 PetscBool sorted; 3429 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3430 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3431 PetscCall(ISGetSize(iscol, &i)); 3432 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3433 3434 PetscCall(ISSorted(iscol_local, &sorted)); 3435 if (sorted) { 3436 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3437 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 } else { /* call == MAT_REUSE_MATRIX */ 3441 IS iscol_sub; 3442 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3443 if (iscol_sub) { 3444 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3445 PetscFunctionReturn(PETSC_SUCCESS); 3446 } 3447 } 3448 } 3449 } 3450 3451 /* General case: iscol -> iscol_local which has global size of iscol */ 3452 if (call == MAT_REUSE_MATRIX) { 3453 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3454 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3455 } else { 3456 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3457 } 3458 3459 PetscCall(ISGetLocalSize(iscol, &csize)); 3460 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3461 3462 if (call == MAT_INITIAL_MATRIX) { 3463 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3464 PetscCall(ISDestroy(&iscol_local)); 3465 } 3466 PetscFunctionReturn(PETSC_SUCCESS); 3467 } 3468 3469 /*@C 3470 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3471 and "off-diagonal" part of the matrix in CSR format. 3472 3473 Collective 3474 3475 Input Parameters: 3476 + comm - MPI communicator 3477 . A - "diagonal" portion of matrix 3478 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3479 - garray - global index of `B` columns 3480 3481 Output Parameter: 3482 . mat - the matrix, with input `A` as its local diagonal matrix 3483 3484 Level: advanced 3485 3486 Notes: 3487 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3488 3489 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3490 3491 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3492 @*/ 3493 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3494 { 3495 Mat_MPIAIJ *maij; 3496 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3497 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3498 const PetscScalar *oa; 3499 Mat Bnew; 3500 PetscInt m, n, N; 3501 MatType mpi_mat_type; 3502 3503 PetscFunctionBegin; 3504 PetscCall(MatCreate(comm, mat)); 3505 PetscCall(MatGetSize(A, &m, &n)); 3506 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3507 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3508 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3509 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3510 3511 /* Get global columns of mat */ 3512 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3513 3514 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3515 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3516 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3517 PetscCall(MatSetType(*mat, mpi_mat_type)); 3518 3519 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3520 maij = (Mat_MPIAIJ *)(*mat)->data; 3521 3522 (*mat)->preallocated = PETSC_TRUE; 3523 3524 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3525 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3526 3527 /* Set A as diagonal portion of *mat */ 3528 maij->A = A; 3529 3530 nz = oi[m]; 3531 for (i = 0; i < nz; i++) { 3532 col = oj[i]; 3533 oj[i] = garray[col]; 3534 } 3535 3536 /* Set Bnew as off-diagonal portion of *mat */ 3537 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3538 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3539 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3540 bnew = (Mat_SeqAIJ *)Bnew->data; 3541 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3542 maij->B = Bnew; 3543 3544 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3545 3546 b->free_a = PETSC_FALSE; 3547 b->free_ij = PETSC_FALSE; 3548 PetscCall(MatDestroy(&B)); 3549 3550 bnew->free_a = PETSC_TRUE; 3551 bnew->free_ij = PETSC_TRUE; 3552 3553 /* condense columns of maij->B */ 3554 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3555 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3556 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3557 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3558 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3559 PetscFunctionReturn(PETSC_SUCCESS); 3560 } 3561 3562 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3563 3564 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3565 { 3566 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3567 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3568 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3569 Mat M, Msub, B = a->B; 3570 MatScalar *aa; 3571 Mat_SeqAIJ *aij; 3572 PetscInt *garray = a->garray, *colsub, Ncols; 3573 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3574 IS iscol_sub, iscmap; 3575 const PetscInt *is_idx, *cmap; 3576 PetscBool allcolumns = PETSC_FALSE; 3577 MPI_Comm comm; 3578 3579 PetscFunctionBegin; 3580 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3581 if (call == MAT_REUSE_MATRIX) { 3582 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3583 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3584 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3585 3586 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3587 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3588 3589 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3590 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3591 3592 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3593 3594 } else { /* call == MAT_INITIAL_MATRIX) */ 3595 PetscBool flg; 3596 3597 PetscCall(ISGetLocalSize(iscol, &n)); 3598 PetscCall(ISGetSize(iscol, &Ncols)); 3599 3600 /* (1) iscol -> nonscalable iscol_local */ 3601 /* Check for special case: each processor gets entire matrix columns */ 3602 PetscCall(ISIdentity(iscol_local, &flg)); 3603 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3604 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3605 if (allcolumns) { 3606 iscol_sub = iscol_local; 3607 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3608 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3609 3610 } else { 3611 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3612 PetscInt *idx, *cmap1, k; 3613 PetscCall(PetscMalloc1(Ncols, &idx)); 3614 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3615 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3616 count = 0; 3617 k = 0; 3618 for (i = 0; i < Ncols; i++) { 3619 j = is_idx[i]; 3620 if (j >= cstart && j < cend) { 3621 /* diagonal part of mat */ 3622 idx[count] = j; 3623 cmap1[count++] = i; /* column index in submat */ 3624 } else if (Bn) { 3625 /* off-diagonal part of mat */ 3626 if (j == garray[k]) { 3627 idx[count] = j; 3628 cmap1[count++] = i; /* column index in submat */ 3629 } else if (j > garray[k]) { 3630 while (j > garray[k] && k < Bn - 1) k++; 3631 if (j == garray[k]) { 3632 idx[count] = j; 3633 cmap1[count++] = i; /* column index in submat */ 3634 } 3635 } 3636 } 3637 } 3638 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3639 3640 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3641 PetscCall(ISGetBlockSize(iscol, &cbs)); 3642 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3643 3644 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3645 } 3646 3647 /* (3) Create sequential Msub */ 3648 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3649 } 3650 3651 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3652 aij = (Mat_SeqAIJ *)Msub->data; 3653 ii = aij->i; 3654 PetscCall(ISGetIndices(iscmap, &cmap)); 3655 3656 /* 3657 m - number of local rows 3658 Ncols - number of columns (same on all processors) 3659 rstart - first row in new global matrix generated 3660 */ 3661 PetscCall(MatGetSize(Msub, &m, NULL)); 3662 3663 if (call == MAT_INITIAL_MATRIX) { 3664 /* (4) Create parallel newmat */ 3665 PetscMPIInt rank, size; 3666 PetscInt csize; 3667 3668 PetscCallMPI(MPI_Comm_size(comm, &size)); 3669 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3670 3671 /* 3672 Determine the number of non-zeros in the diagonal and off-diagonal 3673 portions of the matrix in order to do correct preallocation 3674 */ 3675 3676 /* first get start and end of "diagonal" columns */ 3677 PetscCall(ISGetLocalSize(iscol, &csize)); 3678 if (csize == PETSC_DECIDE) { 3679 PetscCall(ISGetSize(isrow, &mglobal)); 3680 if (mglobal == Ncols) { /* square matrix */ 3681 nlocal = m; 3682 } else { 3683 nlocal = Ncols / size + ((Ncols % size) > rank); 3684 } 3685 } else { 3686 nlocal = csize; 3687 } 3688 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3689 rstart = rend - nlocal; 3690 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3691 3692 /* next, compute all the lengths */ 3693 jj = aij->j; 3694 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3695 olens = dlens + m; 3696 for (i = 0; i < m; i++) { 3697 jend = ii[i + 1] - ii[i]; 3698 olen = 0; 3699 dlen = 0; 3700 for (j = 0; j < jend; j++) { 3701 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3702 else dlen++; 3703 jj++; 3704 } 3705 olens[i] = olen; 3706 dlens[i] = dlen; 3707 } 3708 3709 PetscCall(ISGetBlockSize(isrow, &bs)); 3710 PetscCall(ISGetBlockSize(iscol, &cbs)); 3711 3712 PetscCall(MatCreate(comm, &M)); 3713 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3714 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3715 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3716 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3717 PetscCall(PetscFree(dlens)); 3718 3719 } else { /* call == MAT_REUSE_MATRIX */ 3720 M = *newmat; 3721 PetscCall(MatGetLocalSize(M, &i, NULL)); 3722 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3723 PetscCall(MatZeroEntries(M)); 3724 /* 3725 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3726 rather than the slower MatSetValues(). 3727 */ 3728 M->was_assembled = PETSC_TRUE; 3729 M->assembled = PETSC_FALSE; 3730 } 3731 3732 /* (5) Set values of Msub to *newmat */ 3733 PetscCall(PetscMalloc1(count, &colsub)); 3734 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3735 3736 jj = aij->j; 3737 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3738 for (i = 0; i < m; i++) { 3739 row = rstart + i; 3740 nz = ii[i + 1] - ii[i]; 3741 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3742 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3743 jj += nz; 3744 aa += nz; 3745 } 3746 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3747 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3748 3749 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3750 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3751 3752 PetscCall(PetscFree(colsub)); 3753 3754 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3755 if (call == MAT_INITIAL_MATRIX) { 3756 *newmat = M; 3757 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3758 PetscCall(MatDestroy(&Msub)); 3759 3760 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3761 PetscCall(ISDestroy(&iscol_sub)); 3762 3763 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3764 PetscCall(ISDestroy(&iscmap)); 3765 3766 if (iscol_local) { 3767 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3768 PetscCall(ISDestroy(&iscol_local)); 3769 } 3770 } 3771 PetscFunctionReturn(PETSC_SUCCESS); 3772 } 3773 3774 /* 3775 Not great since it makes two copies of the submatrix, first an SeqAIJ 3776 in local and then by concatenating the local matrices the end result. 3777 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3778 3779 This requires a sequential iscol with all indices. 3780 */ 3781 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3782 { 3783 PetscMPIInt rank, size; 3784 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3785 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3786 Mat M, Mreuse; 3787 MatScalar *aa, *vwork; 3788 MPI_Comm comm; 3789 Mat_SeqAIJ *aij; 3790 PetscBool colflag, allcolumns = PETSC_FALSE; 3791 3792 PetscFunctionBegin; 3793 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3794 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3795 PetscCallMPI(MPI_Comm_size(comm, &size)); 3796 3797 /* Check for special case: each processor gets entire matrix columns */ 3798 PetscCall(ISIdentity(iscol, &colflag)); 3799 PetscCall(ISGetLocalSize(iscol, &n)); 3800 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3801 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3802 3803 if (call == MAT_REUSE_MATRIX) { 3804 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3805 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3806 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3807 } else { 3808 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3809 } 3810 3811 /* 3812 m - number of local rows 3813 n - number of columns (same on all processors) 3814 rstart - first row in new global matrix generated 3815 */ 3816 PetscCall(MatGetSize(Mreuse, &m, &n)); 3817 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3818 if (call == MAT_INITIAL_MATRIX) { 3819 aij = (Mat_SeqAIJ *)Mreuse->data; 3820 ii = aij->i; 3821 jj = aij->j; 3822 3823 /* 3824 Determine the number of non-zeros in the diagonal and off-diagonal 3825 portions of the matrix in order to do correct preallocation 3826 */ 3827 3828 /* first get start and end of "diagonal" columns */ 3829 if (csize == PETSC_DECIDE) { 3830 PetscCall(ISGetSize(isrow, &mglobal)); 3831 if (mglobal == n) { /* square matrix */ 3832 nlocal = m; 3833 } else { 3834 nlocal = n / size + ((n % size) > rank); 3835 } 3836 } else { 3837 nlocal = csize; 3838 } 3839 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3840 rstart = rend - nlocal; 3841 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3842 3843 /* next, compute all the lengths */ 3844 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3845 olens = dlens + m; 3846 for (i = 0; i < m; i++) { 3847 jend = ii[i + 1] - ii[i]; 3848 olen = 0; 3849 dlen = 0; 3850 for (j = 0; j < jend; j++) { 3851 if (*jj < rstart || *jj >= rend) olen++; 3852 else dlen++; 3853 jj++; 3854 } 3855 olens[i] = olen; 3856 dlens[i] = dlen; 3857 } 3858 PetscCall(MatCreate(comm, &M)); 3859 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3860 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3861 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3862 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3863 PetscCall(PetscFree(dlens)); 3864 } else { 3865 PetscInt ml, nl; 3866 3867 M = *newmat; 3868 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3869 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3870 PetscCall(MatZeroEntries(M)); 3871 /* 3872 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3873 rather than the slower MatSetValues(). 3874 */ 3875 M->was_assembled = PETSC_TRUE; 3876 M->assembled = PETSC_FALSE; 3877 } 3878 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3879 aij = (Mat_SeqAIJ *)Mreuse->data; 3880 ii = aij->i; 3881 jj = aij->j; 3882 3883 /* trigger copy to CPU if needed */ 3884 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3885 for (i = 0; i < m; i++) { 3886 row = rstart + i; 3887 nz = ii[i + 1] - ii[i]; 3888 cwork = jj; 3889 jj = PetscSafePointerPlusOffset(jj, nz); 3890 vwork = aa; 3891 aa = PetscSafePointerPlusOffset(aa, nz); 3892 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3893 } 3894 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3895 3896 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3897 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3898 *newmat = M; 3899 3900 /* save submatrix used in processor for next request */ 3901 if (call == MAT_INITIAL_MATRIX) { 3902 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3903 PetscCall(MatDestroy(&Mreuse)); 3904 } 3905 PetscFunctionReturn(PETSC_SUCCESS); 3906 } 3907 3908 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3909 { 3910 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3911 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3912 const PetscInt *JJ; 3913 PetscBool nooffprocentries; 3914 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3915 3916 PetscFunctionBegin; 3917 PetscCall(PetscLayoutSetUp(B->rmap)); 3918 PetscCall(PetscLayoutSetUp(B->cmap)); 3919 m = B->rmap->n; 3920 cstart = B->cmap->rstart; 3921 cend = B->cmap->rend; 3922 rstart = B->rmap->rstart; 3923 irstart = Ii[0]; 3924 3925 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3926 3927 if (PetscDefined(USE_DEBUG)) { 3928 for (i = 0; i < m; i++) { 3929 nnz = Ii[i + 1] - Ii[i]; 3930 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3931 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3932 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3933 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3934 } 3935 } 3936 3937 for (i = 0; i < m; i++) { 3938 nnz = Ii[i + 1] - Ii[i]; 3939 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3940 nnz_max = PetscMax(nnz_max, nnz); 3941 d = 0; 3942 for (j = 0; j < nnz; j++) { 3943 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3944 } 3945 d_nnz[i] = d; 3946 o_nnz[i] = nnz - d; 3947 } 3948 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3949 PetscCall(PetscFree2(d_nnz, o_nnz)); 3950 3951 for (i = 0; i < m; i++) { 3952 ii = i + rstart; 3953 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3954 } 3955 nooffprocentries = B->nooffprocentries; 3956 B->nooffprocentries = PETSC_TRUE; 3957 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3958 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3959 B->nooffprocentries = nooffprocentries; 3960 3961 /* count number of entries below block diagonal */ 3962 PetscCall(PetscFree(Aij->ld)); 3963 PetscCall(PetscCalloc1(m, &ld)); 3964 Aij->ld = ld; 3965 for (i = 0; i < m; i++) { 3966 nnz = Ii[i + 1] - Ii[i]; 3967 j = 0; 3968 while (j < nnz && J[j] < cstart) j++; 3969 ld[i] = j; 3970 if (J) J += nnz; 3971 } 3972 3973 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3974 PetscFunctionReturn(PETSC_SUCCESS); 3975 } 3976 3977 /*@ 3978 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3979 (the default parallel PETSc format). 3980 3981 Collective 3982 3983 Input Parameters: 3984 + B - the matrix 3985 . i - the indices into `j` for the start of each local row (indices start with zero) 3986 . j - the column indices for each local row (indices start with zero) 3987 - v - optional values in the matrix 3988 3989 Level: developer 3990 3991 Notes: 3992 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3993 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3994 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3995 3996 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3997 3998 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3999 4000 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4001 4002 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4003 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4004 4005 The format which is used for the sparse matrix input, is equivalent to a 4006 row-major ordering.. i.e for the following matrix, the input data expected is 4007 as shown 4008 .vb 4009 1 0 0 4010 2 0 3 P0 4011 ------- 4012 4 5 6 P1 4013 4014 Process0 [P0] rows_owned=[0,1] 4015 i = {0,1,3} [size = nrow+1 = 2+1] 4016 j = {0,0,2} [size = 3] 4017 v = {1,2,3} [size = 3] 4018 4019 Process1 [P1] rows_owned=[2] 4020 i = {0,3} [size = nrow+1 = 1+1] 4021 j = {0,1,2} [size = 3] 4022 v = {4,5,6} [size = 3] 4023 .ve 4024 4025 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4026 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4027 @*/ 4028 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4029 { 4030 PetscFunctionBegin; 4031 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4032 PetscFunctionReturn(PETSC_SUCCESS); 4033 } 4034 4035 /*@ 4036 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4037 (the default parallel PETSc format). For good matrix assembly performance 4038 the user should preallocate the matrix storage by setting the parameters 4039 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4040 4041 Collective 4042 4043 Input Parameters: 4044 + B - the matrix 4045 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4046 (same value is used for all local rows) 4047 . d_nnz - array containing the number of nonzeros in the various rows of the 4048 DIAGONAL portion of the local submatrix (possibly different for each row) 4049 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4050 The size of this array is equal to the number of local rows, i.e 'm'. 4051 For matrices that will be factored, you must leave room for (and set) 4052 the diagonal entry even if it is zero. 4053 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4054 submatrix (same value is used for all local rows). 4055 - o_nnz - array containing the number of nonzeros in the various rows of the 4056 OFF-DIAGONAL portion of the local submatrix (possibly different for 4057 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4058 structure. The size of this array is equal to the number 4059 of local rows, i.e 'm'. 4060 4061 Example Usage: 4062 Consider the following 8x8 matrix with 34 non-zero values, that is 4063 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4064 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4065 as follows 4066 4067 .vb 4068 1 2 0 | 0 3 0 | 0 4 4069 Proc0 0 5 6 | 7 0 0 | 8 0 4070 9 0 10 | 11 0 0 | 12 0 4071 ------------------------------------- 4072 13 0 14 | 15 16 17 | 0 0 4073 Proc1 0 18 0 | 19 20 21 | 0 0 4074 0 0 0 | 22 23 0 | 24 0 4075 ------------------------------------- 4076 Proc2 25 26 27 | 0 0 28 | 29 0 4077 30 0 0 | 31 32 33 | 0 34 4078 .ve 4079 4080 This can be represented as a collection of submatrices as 4081 .vb 4082 A B C 4083 D E F 4084 G H I 4085 .ve 4086 4087 Where the submatrices A,B,C are owned by proc0, D,E,F are 4088 owned by proc1, G,H,I are owned by proc2. 4089 4090 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4091 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4092 The 'M','N' parameters are 8,8, and have the same values on all procs. 4093 4094 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4095 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4096 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4097 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4098 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4099 matrix, and [DF] as another `MATSEQAIJ` matrix. 4100 4101 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4102 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4103 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4104 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4105 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4106 In this case, the values of `d_nz`, `o_nz` are 4107 .vb 4108 proc0 dnz = 2, o_nz = 2 4109 proc1 dnz = 3, o_nz = 2 4110 proc2 dnz = 1, o_nz = 4 4111 .ve 4112 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4113 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4114 for proc3. i.e we are using 12+15+10=37 storage locations to store 4115 34 values. 4116 4117 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4118 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4119 In the above case the values for `d_nnz`, `o_nnz` are 4120 .vb 4121 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4122 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4123 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4124 .ve 4125 Here the space allocated is sum of all the above values i.e 34, and 4126 hence pre-allocation is perfect. 4127 4128 Level: intermediate 4129 4130 Notes: 4131 If the *_nnz parameter is given then the *_nz parameter is ignored 4132 4133 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4134 storage. The stored row and column indices begin with zero. 4135 See [Sparse Matrices](sec_matsparse) for details. 4136 4137 The parallel matrix is partitioned such that the first m0 rows belong to 4138 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4139 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4140 4141 The DIAGONAL portion of the local submatrix of a processor can be defined 4142 as the submatrix which is obtained by extraction the part corresponding to 4143 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4144 first row that belongs to the processor, r2 is the last row belonging to 4145 the this processor, and c1-c2 is range of indices of the local part of a 4146 vector suitable for applying the matrix to. This is an mxn matrix. In the 4147 common case of a square matrix, the row and column ranges are the same and 4148 the DIAGONAL part is also square. The remaining portion of the local 4149 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4150 4151 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4152 4153 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4154 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4155 You can also run with the option `-info` and look for messages with the string 4156 malloc in them to see if additional memory allocation was needed. 4157 4158 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4159 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4160 @*/ 4161 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4162 { 4163 PetscFunctionBegin; 4164 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4165 PetscValidType(B, 1); 4166 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4167 PetscFunctionReturn(PETSC_SUCCESS); 4168 } 4169 4170 /*@ 4171 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4172 CSR format for the local rows. 4173 4174 Collective 4175 4176 Input Parameters: 4177 + comm - MPI communicator 4178 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4179 . n - This value should be the same as the local size used in creating the 4180 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4181 calculated if `N` is given) For square matrices n is almost always `m`. 4182 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4183 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4184 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4185 . j - global column indices 4186 - a - optional matrix values 4187 4188 Output Parameter: 4189 . mat - the matrix 4190 4191 Level: intermediate 4192 4193 Notes: 4194 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4195 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4196 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4197 4198 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4199 4200 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4201 4202 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4203 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4204 4205 The format which is used for the sparse matrix input, is equivalent to a 4206 row-major ordering, i.e., for the following matrix, the input data expected is 4207 as shown 4208 .vb 4209 1 0 0 4210 2 0 3 P0 4211 ------- 4212 4 5 6 P1 4213 4214 Process0 [P0] rows_owned=[0,1] 4215 i = {0,1,3} [size = nrow+1 = 2+1] 4216 j = {0,0,2} [size = 3] 4217 v = {1,2,3} [size = 3] 4218 4219 Process1 [P1] rows_owned=[2] 4220 i = {0,3} [size = nrow+1 = 1+1] 4221 j = {0,1,2} [size = 3] 4222 v = {4,5,6} [size = 3] 4223 .ve 4224 4225 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4226 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4227 @*/ 4228 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4229 { 4230 PetscFunctionBegin; 4231 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4232 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4233 PetscCall(MatCreate(comm, mat)); 4234 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4235 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4236 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4237 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4238 PetscFunctionReturn(PETSC_SUCCESS); 4239 } 4240 4241 /*@ 4242 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4243 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4244 from `MatCreateMPIAIJWithArrays()` 4245 4246 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4247 4248 Collective 4249 4250 Input Parameters: 4251 + mat - the matrix 4252 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4253 . n - This value should be the same as the local size used in creating the 4254 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4255 calculated if N is given) For square matrices n is almost always m. 4256 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4257 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4258 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4259 . J - column indices 4260 - v - matrix values 4261 4262 Level: deprecated 4263 4264 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4265 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4266 @*/ 4267 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4268 { 4269 PetscInt nnz, i; 4270 PetscBool nooffprocentries; 4271 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4272 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4273 PetscScalar *ad, *ao; 4274 PetscInt ldi, Iii, md; 4275 const PetscInt *Adi = Ad->i; 4276 PetscInt *ld = Aij->ld; 4277 4278 PetscFunctionBegin; 4279 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4280 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4281 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4282 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4283 4284 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4285 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4286 4287 for (i = 0; i < m; i++) { 4288 if (PetscDefined(USE_DEBUG)) { 4289 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4290 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4291 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4292 } 4293 } 4294 nnz = Ii[i + 1] - Ii[i]; 4295 Iii = Ii[i]; 4296 ldi = ld[i]; 4297 md = Adi[i + 1] - Adi[i]; 4298 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4299 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4300 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4301 ad += md; 4302 ao += nnz - md; 4303 } 4304 nooffprocentries = mat->nooffprocentries; 4305 mat->nooffprocentries = PETSC_TRUE; 4306 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4307 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4308 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4309 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4310 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4311 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4312 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4313 mat->nooffprocentries = nooffprocentries; 4314 PetscFunctionReturn(PETSC_SUCCESS); 4315 } 4316 4317 /*@ 4318 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4319 4320 Collective 4321 4322 Input Parameters: 4323 + mat - the matrix 4324 - v - matrix values, stored by row 4325 4326 Level: intermediate 4327 4328 Notes: 4329 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4330 4331 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4332 4333 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4334 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4335 @*/ 4336 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4337 { 4338 PetscInt nnz, i, m; 4339 PetscBool nooffprocentries; 4340 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4341 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4342 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4343 PetscScalar *ad, *ao; 4344 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4345 PetscInt ldi, Iii, md; 4346 PetscInt *ld = Aij->ld; 4347 4348 PetscFunctionBegin; 4349 m = mat->rmap->n; 4350 4351 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4352 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4353 Iii = 0; 4354 for (i = 0; i < m; i++) { 4355 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4356 ldi = ld[i]; 4357 md = Adi[i + 1] - Adi[i]; 4358 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4359 ad += md; 4360 if (ao) { 4361 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4362 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4363 ao += nnz - md; 4364 } 4365 Iii += nnz; 4366 } 4367 nooffprocentries = mat->nooffprocentries; 4368 mat->nooffprocentries = PETSC_TRUE; 4369 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4370 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4371 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4372 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4373 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4374 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4375 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4376 mat->nooffprocentries = nooffprocentries; 4377 PetscFunctionReturn(PETSC_SUCCESS); 4378 } 4379 4380 /*@ 4381 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4382 (the default parallel PETSc format). For good matrix assembly performance 4383 the user should preallocate the matrix storage by setting the parameters 4384 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4385 4386 Collective 4387 4388 Input Parameters: 4389 + comm - MPI communicator 4390 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4391 This value should be the same as the local size used in creating the 4392 y vector for the matrix-vector product y = Ax. 4393 . n - This value should be the same as the local size used in creating the 4394 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4395 calculated if N is given) For square matrices n is almost always m. 4396 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4397 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4398 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4399 (same value is used for all local rows) 4400 . d_nnz - array containing the number of nonzeros in the various rows of the 4401 DIAGONAL portion of the local submatrix (possibly different for each row) 4402 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4403 The size of this array is equal to the number of local rows, i.e 'm'. 4404 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4405 submatrix (same value is used for all local rows). 4406 - o_nnz - array containing the number of nonzeros in the various rows of the 4407 OFF-DIAGONAL portion of the local submatrix (possibly different for 4408 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4409 structure. The size of this array is equal to the number 4410 of local rows, i.e 'm'. 4411 4412 Output Parameter: 4413 . A - the matrix 4414 4415 Options Database Keys: 4416 + -mat_no_inode - Do not use inodes 4417 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4418 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4419 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4420 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4421 4422 Level: intermediate 4423 4424 Notes: 4425 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4426 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4427 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4428 4429 If the *_nnz parameter is given then the *_nz parameter is ignored 4430 4431 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4432 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4433 storage requirements for this matrix. 4434 4435 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4436 processor than it must be used on all processors that share the object for 4437 that argument. 4438 4439 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4440 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4441 4442 The user MUST specify either the local or global matrix dimensions 4443 (possibly both). 4444 4445 The parallel matrix is partitioned across processors such that the 4446 first `m0` rows belong to process 0, the next `m1` rows belong to 4447 process 1, the next `m2` rows belong to process 2, etc., where 4448 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4449 values corresponding to [m x N] submatrix. 4450 4451 The columns are logically partitioned with the n0 columns belonging 4452 to 0th partition, the next n1 columns belonging to the next 4453 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4454 4455 The DIAGONAL portion of the local submatrix on any given processor 4456 is the submatrix corresponding to the rows and columns m,n 4457 corresponding to the given processor. i.e diagonal matrix on 4458 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4459 etc. The remaining portion of the local submatrix [m x (N-n)] 4460 constitute the OFF-DIAGONAL portion. The example below better 4461 illustrates this concept. The two matrices, the DIAGONAL portion and 4462 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4463 4464 For a square global matrix we define each processor's diagonal portion 4465 to be its local rows and the corresponding columns (a square submatrix); 4466 each processor's off-diagonal portion encompasses the remainder of the 4467 local matrix (a rectangular submatrix). 4468 4469 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4470 4471 When calling this routine with a single process communicator, a matrix of 4472 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4473 type of communicator, use the construction mechanism 4474 .vb 4475 MatCreate(..., &A); 4476 MatSetType(A, MATMPIAIJ); 4477 MatSetSizes(A, m, n, M, N); 4478 MatMPIAIJSetPreallocation(A, ...); 4479 .ve 4480 4481 By default, this format uses inodes (identical nodes) when possible. 4482 We search for consecutive rows with the same nonzero structure, thereby 4483 reusing matrix information to achieve increased efficiency. 4484 4485 Example Usage: 4486 Consider the following 8x8 matrix with 34 non-zero values, that is 4487 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4488 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4489 as follows 4490 4491 .vb 4492 1 2 0 | 0 3 0 | 0 4 4493 Proc0 0 5 6 | 7 0 0 | 8 0 4494 9 0 10 | 11 0 0 | 12 0 4495 ------------------------------------- 4496 13 0 14 | 15 16 17 | 0 0 4497 Proc1 0 18 0 | 19 20 21 | 0 0 4498 0 0 0 | 22 23 0 | 24 0 4499 ------------------------------------- 4500 Proc2 25 26 27 | 0 0 28 | 29 0 4501 30 0 0 | 31 32 33 | 0 34 4502 .ve 4503 4504 This can be represented as a collection of submatrices as 4505 4506 .vb 4507 A B C 4508 D E F 4509 G H I 4510 .ve 4511 4512 Where the submatrices A,B,C are owned by proc0, D,E,F are 4513 owned by proc1, G,H,I are owned by proc2. 4514 4515 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4516 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4517 The 'M','N' parameters are 8,8, and have the same values on all procs. 4518 4519 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4520 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4521 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4522 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4523 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4524 matrix, and [DF] as another SeqAIJ matrix. 4525 4526 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4527 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4528 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4529 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4530 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4531 In this case, the values of `d_nz`,`o_nz` are 4532 .vb 4533 proc0 dnz = 2, o_nz = 2 4534 proc1 dnz = 3, o_nz = 2 4535 proc2 dnz = 1, o_nz = 4 4536 .ve 4537 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4538 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4539 for proc3. i.e we are using 12+15+10=37 storage locations to store 4540 34 values. 4541 4542 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4543 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4544 In the above case the values for d_nnz,o_nnz are 4545 .vb 4546 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4547 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4548 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4549 .ve 4550 Here the space allocated is sum of all the above values i.e 34, and 4551 hence pre-allocation is perfect. 4552 4553 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4554 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4555 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4556 @*/ 4557 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4558 { 4559 PetscMPIInt size; 4560 4561 PetscFunctionBegin; 4562 PetscCall(MatCreate(comm, A)); 4563 PetscCall(MatSetSizes(*A, m, n, M, N)); 4564 PetscCallMPI(MPI_Comm_size(comm, &size)); 4565 if (size > 1) { 4566 PetscCall(MatSetType(*A, MATMPIAIJ)); 4567 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4568 } else { 4569 PetscCall(MatSetType(*A, MATSEQAIJ)); 4570 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4571 } 4572 PetscFunctionReturn(PETSC_SUCCESS); 4573 } 4574 4575 /*MC 4576 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4577 4578 Synopsis: 4579 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4580 4581 Not Collective 4582 4583 Input Parameter: 4584 . A - the `MATMPIAIJ` matrix 4585 4586 Output Parameters: 4587 + Ad - the diagonal portion of the matrix 4588 . Ao - the off-diagonal portion of the matrix 4589 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4590 - ierr - error code 4591 4592 Level: advanced 4593 4594 Note: 4595 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4596 4597 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4598 M*/ 4599 4600 /*MC 4601 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4602 4603 Synopsis: 4604 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4605 4606 Not Collective 4607 4608 Input Parameters: 4609 + A - the `MATMPIAIJ` matrix 4610 . Ad - the diagonal portion of the matrix 4611 . Ao - the off-diagonal portion of the matrix 4612 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4613 - ierr - error code 4614 4615 Level: advanced 4616 4617 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4618 M*/ 4619 4620 /*@C 4621 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4622 4623 Not Collective 4624 4625 Input Parameter: 4626 . A - The `MATMPIAIJ` matrix 4627 4628 Output Parameters: 4629 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4630 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4631 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4632 4633 Level: intermediate 4634 4635 Note: 4636 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4637 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4638 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4639 local column numbers to global column numbers in the original matrix. 4640 4641 Fortran Notes: 4642 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4643 4644 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4645 @*/ 4646 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4647 { 4648 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4649 PetscBool flg; 4650 4651 PetscFunctionBegin; 4652 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4653 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4654 if (Ad) *Ad = a->A; 4655 if (Ao) *Ao = a->B; 4656 if (colmap) *colmap = a->garray; 4657 PetscFunctionReturn(PETSC_SUCCESS); 4658 } 4659 4660 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4661 { 4662 PetscInt m, N, i, rstart, nnz, Ii; 4663 PetscInt *indx; 4664 PetscScalar *values; 4665 MatType rootType; 4666 4667 PetscFunctionBegin; 4668 PetscCall(MatGetSize(inmat, &m, &N)); 4669 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4670 PetscInt *dnz, *onz, sum, bs, cbs; 4671 4672 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4673 /* Check sum(n) = N */ 4674 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4675 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4676 4677 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4678 rstart -= m; 4679 4680 MatPreallocateBegin(comm, m, n, dnz, onz); 4681 for (i = 0; i < m; i++) { 4682 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4683 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4684 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4685 } 4686 4687 PetscCall(MatCreate(comm, outmat)); 4688 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4689 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4690 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4691 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4692 PetscCall(MatSetType(*outmat, rootType)); 4693 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4694 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4695 MatPreallocateEnd(dnz, onz); 4696 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4697 } 4698 4699 /* numeric phase */ 4700 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4701 for (i = 0; i < m; i++) { 4702 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4703 Ii = i + rstart; 4704 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4705 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4706 } 4707 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4708 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4709 PetscFunctionReturn(PETSC_SUCCESS); 4710 } 4711 4712 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4713 { 4714 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4715 4716 PetscFunctionBegin; 4717 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4718 PetscCall(PetscFree(merge->id_r)); 4719 PetscCall(PetscFree(merge->len_s)); 4720 PetscCall(PetscFree(merge->len_r)); 4721 PetscCall(PetscFree(merge->bi)); 4722 PetscCall(PetscFree(merge->bj)); 4723 PetscCall(PetscFree(merge->buf_ri[0])); 4724 PetscCall(PetscFree(merge->buf_ri)); 4725 PetscCall(PetscFree(merge->buf_rj[0])); 4726 PetscCall(PetscFree(merge->buf_rj)); 4727 PetscCall(PetscFree(merge->coi)); 4728 PetscCall(PetscFree(merge->coj)); 4729 PetscCall(PetscFree(merge->owners_co)); 4730 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4731 PetscCall(PetscFree(merge)); 4732 PetscFunctionReturn(PETSC_SUCCESS); 4733 } 4734 4735 #include <../src/mat/utils/freespace.h> 4736 #include <petscbt.h> 4737 4738 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4739 { 4740 MPI_Comm comm; 4741 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4742 PetscMPIInt size, rank, taga, *len_s; 4743 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4744 PetscMPIInt proc, k; 4745 PetscInt **buf_ri, **buf_rj; 4746 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4747 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4748 MPI_Request *s_waits, *r_waits; 4749 MPI_Status *status; 4750 const MatScalar *aa, *a_a; 4751 MatScalar **abuf_r, *ba_i; 4752 Mat_Merge_SeqsToMPI *merge; 4753 PetscContainer container; 4754 4755 PetscFunctionBegin; 4756 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4757 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4758 4759 PetscCallMPI(MPI_Comm_size(comm, &size)); 4760 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4761 4762 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4763 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4764 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4765 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4766 aa = a_a; 4767 4768 bi = merge->bi; 4769 bj = merge->bj; 4770 buf_ri = merge->buf_ri; 4771 buf_rj = merge->buf_rj; 4772 4773 PetscCall(PetscMalloc1(size, &status)); 4774 owners = merge->rowmap->range; 4775 len_s = merge->len_s; 4776 4777 /* send and recv matrix values */ 4778 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4779 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4780 4781 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4782 for (proc = 0, k = 0; proc < size; proc++) { 4783 if (!len_s[proc]) continue; 4784 i = owners[proc]; 4785 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4786 k++; 4787 } 4788 4789 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4790 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4791 PetscCall(PetscFree(status)); 4792 4793 PetscCall(PetscFree(s_waits)); 4794 PetscCall(PetscFree(r_waits)); 4795 4796 /* insert mat values of mpimat */ 4797 PetscCall(PetscMalloc1(N, &ba_i)); 4798 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4799 4800 for (k = 0; k < merge->nrecv; k++) { 4801 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4802 nrows = *buf_ri_k[k]; 4803 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4804 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4805 } 4806 4807 /* set values of ba */ 4808 m = merge->rowmap->n; 4809 for (i = 0; i < m; i++) { 4810 arow = owners[rank] + i; 4811 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4812 bnzi = bi[i + 1] - bi[i]; 4813 PetscCall(PetscArrayzero(ba_i, bnzi)); 4814 4815 /* add local non-zero vals of this proc's seqmat into ba */ 4816 anzi = ai[arow + 1] - ai[arow]; 4817 aj = a->j + ai[arow]; 4818 aa = a_a + ai[arow]; 4819 nextaj = 0; 4820 for (j = 0; nextaj < anzi; j++) { 4821 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4822 ba_i[j] += aa[nextaj++]; 4823 } 4824 } 4825 4826 /* add received vals into ba */ 4827 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4828 /* i-th row */ 4829 if (i == *nextrow[k]) { 4830 anzi = *(nextai[k] + 1) - *nextai[k]; 4831 aj = buf_rj[k] + *nextai[k]; 4832 aa = abuf_r[k] + *nextai[k]; 4833 nextaj = 0; 4834 for (j = 0; nextaj < anzi; j++) { 4835 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4836 ba_i[j] += aa[nextaj++]; 4837 } 4838 } 4839 nextrow[k]++; 4840 nextai[k]++; 4841 } 4842 } 4843 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4844 } 4845 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4846 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4847 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4848 4849 PetscCall(PetscFree(abuf_r[0])); 4850 PetscCall(PetscFree(abuf_r)); 4851 PetscCall(PetscFree(ba_i)); 4852 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4853 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4854 PetscFunctionReturn(PETSC_SUCCESS); 4855 } 4856 4857 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4858 { 4859 Mat B_mpi; 4860 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4861 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4862 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4863 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4864 PetscInt len, *dnz, *onz, bs, cbs; 4865 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4866 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4867 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4868 MPI_Status *status; 4869 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4870 PetscBT lnkbt; 4871 Mat_Merge_SeqsToMPI *merge; 4872 PetscContainer container; 4873 4874 PetscFunctionBegin; 4875 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4876 4877 /* make sure it is a PETSc comm */ 4878 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4879 PetscCallMPI(MPI_Comm_size(comm, &size)); 4880 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4881 4882 PetscCall(PetscNew(&merge)); 4883 PetscCall(PetscMalloc1(size, &status)); 4884 4885 /* determine row ownership */ 4886 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4887 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4888 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4889 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4890 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4891 PetscCall(PetscMalloc1(size, &len_si)); 4892 PetscCall(PetscMalloc1(size, &merge->len_s)); 4893 4894 m = merge->rowmap->n; 4895 owners = merge->rowmap->range; 4896 4897 /* determine the number of messages to send, their lengths */ 4898 len_s = merge->len_s; 4899 4900 len = 0; /* length of buf_si[] */ 4901 merge->nsend = 0; 4902 for (PetscMPIInt proc = 0; proc < size; proc++) { 4903 len_si[proc] = 0; 4904 if (proc == rank) { 4905 len_s[proc] = 0; 4906 } else { 4907 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4908 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4909 } 4910 if (len_s[proc]) { 4911 merge->nsend++; 4912 nrows = 0; 4913 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4914 if (ai[i + 1] > ai[i]) nrows++; 4915 } 4916 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4917 len += len_si[proc]; 4918 } 4919 } 4920 4921 /* determine the number and length of messages to receive for ij-structure */ 4922 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4923 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4924 4925 /* post the Irecv of j-structure */ 4926 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4927 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4928 4929 /* post the Isend of j-structure */ 4930 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4931 4932 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4933 if (!len_s[proc]) continue; 4934 i = owners[proc]; 4935 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4936 k++; 4937 } 4938 4939 /* receives and sends of j-structure are complete */ 4940 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4941 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4942 4943 /* send and recv i-structure */ 4944 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4945 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4946 4947 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4948 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4949 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4950 if (!len_s[proc]) continue; 4951 /* form outgoing message for i-structure: 4952 buf_si[0]: nrows to be sent 4953 [1:nrows]: row index (global) 4954 [nrows+1:2*nrows+1]: i-structure index 4955 */ 4956 nrows = len_si[proc] / 2 - 1; 4957 buf_si_i = buf_si + nrows + 1; 4958 buf_si[0] = nrows; 4959 buf_si_i[0] = 0; 4960 nrows = 0; 4961 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4962 anzi = ai[i + 1] - ai[i]; 4963 if (anzi) { 4964 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4965 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4966 nrows++; 4967 } 4968 } 4969 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4970 k++; 4971 buf_si += len_si[proc]; 4972 } 4973 4974 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4975 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4976 4977 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4978 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4979 4980 PetscCall(PetscFree(len_si)); 4981 PetscCall(PetscFree(len_ri)); 4982 PetscCall(PetscFree(rj_waits)); 4983 PetscCall(PetscFree2(si_waits, sj_waits)); 4984 PetscCall(PetscFree(ri_waits)); 4985 PetscCall(PetscFree(buf_s)); 4986 PetscCall(PetscFree(status)); 4987 4988 /* compute a local seq matrix in each processor */ 4989 /* allocate bi array and free space for accumulating nonzero column info */ 4990 PetscCall(PetscMalloc1(m + 1, &bi)); 4991 bi[0] = 0; 4992 4993 /* create and initialize a linked list */ 4994 nlnk = N + 1; 4995 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4996 4997 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4998 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4999 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5000 5001 current_space = free_space; 5002 5003 /* determine symbolic info for each local row */ 5004 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5005 5006 for (k = 0; k < merge->nrecv; k++) { 5007 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5008 nrows = *buf_ri_k[k]; 5009 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5010 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5011 } 5012 5013 MatPreallocateBegin(comm, m, n, dnz, onz); 5014 len = 0; 5015 for (i = 0; i < m; i++) { 5016 bnzi = 0; 5017 /* add local non-zero cols of this proc's seqmat into lnk */ 5018 arow = owners[rank] + i; 5019 anzi = ai[arow + 1] - ai[arow]; 5020 aj = a->j + ai[arow]; 5021 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5022 bnzi += nlnk; 5023 /* add received col data into lnk */ 5024 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5025 if (i == *nextrow[k]) { /* i-th row */ 5026 anzi = *(nextai[k] + 1) - *nextai[k]; 5027 aj = buf_rj[k] + *nextai[k]; 5028 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5029 bnzi += nlnk; 5030 nextrow[k]++; 5031 nextai[k]++; 5032 } 5033 } 5034 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5035 5036 /* if free space is not available, make more free space */ 5037 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5038 /* copy data into free space, then initialize lnk */ 5039 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5040 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5041 5042 current_space->array += bnzi; 5043 current_space->local_used += bnzi; 5044 current_space->local_remaining -= bnzi; 5045 5046 bi[i + 1] = bi[i] + bnzi; 5047 } 5048 5049 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5050 5051 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5052 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5053 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5054 5055 /* create symbolic parallel matrix B_mpi */ 5056 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5057 PetscCall(MatCreate(comm, &B_mpi)); 5058 if (n == PETSC_DECIDE) { 5059 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5060 } else { 5061 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5062 } 5063 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5064 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5065 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5066 MatPreallocateEnd(dnz, onz); 5067 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5068 5069 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5070 B_mpi->assembled = PETSC_FALSE; 5071 merge->bi = bi; 5072 merge->bj = bj; 5073 merge->buf_ri = buf_ri; 5074 merge->buf_rj = buf_rj; 5075 merge->coi = NULL; 5076 merge->coj = NULL; 5077 merge->owners_co = NULL; 5078 5079 PetscCall(PetscCommDestroy(&comm)); 5080 5081 /* attach the supporting struct to B_mpi for reuse */ 5082 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5083 PetscCall(PetscContainerSetPointer(container, merge)); 5084 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5085 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5086 PetscCall(PetscContainerDestroy(&container)); 5087 *mpimat = B_mpi; 5088 5089 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5090 PetscFunctionReturn(PETSC_SUCCESS); 5091 } 5092 5093 /*@ 5094 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5095 matrices from each processor 5096 5097 Collective 5098 5099 Input Parameters: 5100 + comm - the communicators the parallel matrix will live on 5101 . seqmat - the input sequential matrices 5102 . m - number of local rows (or `PETSC_DECIDE`) 5103 . n - number of local columns (or `PETSC_DECIDE`) 5104 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5105 5106 Output Parameter: 5107 . mpimat - the parallel matrix generated 5108 5109 Level: advanced 5110 5111 Note: 5112 The dimensions of the sequential matrix in each processor MUST be the same. 5113 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5114 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5115 5116 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5117 @*/ 5118 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5119 { 5120 PetscMPIInt size; 5121 5122 PetscFunctionBegin; 5123 PetscCallMPI(MPI_Comm_size(comm, &size)); 5124 if (size == 1) { 5125 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5126 if (scall == MAT_INITIAL_MATRIX) { 5127 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5128 } else { 5129 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5130 } 5131 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5132 PetscFunctionReturn(PETSC_SUCCESS); 5133 } 5134 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5135 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5136 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5137 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5138 PetscFunctionReturn(PETSC_SUCCESS); 5139 } 5140 5141 /*@ 5142 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5143 5144 Not Collective 5145 5146 Input Parameter: 5147 . A - the matrix 5148 5149 Output Parameter: 5150 . A_loc - the local sequential matrix generated 5151 5152 Level: developer 5153 5154 Notes: 5155 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5156 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5157 `n` is the global column count obtained with `MatGetSize()` 5158 5159 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5160 5161 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5162 5163 Destroy the matrix with `MatDestroy()` 5164 5165 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5166 @*/ 5167 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5168 { 5169 PetscBool mpi; 5170 5171 PetscFunctionBegin; 5172 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5173 if (mpi) { 5174 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5175 } else { 5176 *A_loc = A; 5177 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5178 } 5179 PetscFunctionReturn(PETSC_SUCCESS); 5180 } 5181 5182 /*@ 5183 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5184 5185 Not Collective 5186 5187 Input Parameters: 5188 + A - the matrix 5189 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5190 5191 Output Parameter: 5192 . A_loc - the local sequential matrix generated 5193 5194 Level: developer 5195 5196 Notes: 5197 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5198 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5199 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5200 5201 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5202 5203 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5204 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5205 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5206 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5207 5208 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5209 @*/ 5210 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5211 { 5212 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5213 Mat_SeqAIJ *mat, *a, *b; 5214 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5215 const PetscScalar *aa, *ba, *aav, *bav; 5216 PetscScalar *ca, *cam; 5217 PetscMPIInt size; 5218 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5219 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5220 PetscBool match; 5221 5222 PetscFunctionBegin; 5223 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5224 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5225 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5226 if (size == 1) { 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5229 *A_loc = mpimat->A; 5230 } else if (scall == MAT_REUSE_MATRIX) { 5231 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5232 } 5233 PetscFunctionReturn(PETSC_SUCCESS); 5234 } 5235 5236 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5237 a = (Mat_SeqAIJ *)mpimat->A->data; 5238 b = (Mat_SeqAIJ *)mpimat->B->data; 5239 ai = a->i; 5240 aj = a->j; 5241 bi = b->i; 5242 bj = b->j; 5243 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5244 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5245 aa = aav; 5246 ba = bav; 5247 if (scall == MAT_INITIAL_MATRIX) { 5248 PetscCall(PetscMalloc1(1 + am, &ci)); 5249 ci[0] = 0; 5250 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5251 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5252 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5253 k = 0; 5254 for (i = 0; i < am; i++) { 5255 ncols_o = bi[i + 1] - bi[i]; 5256 ncols_d = ai[i + 1] - ai[i]; 5257 /* off-diagonal portion of A */ 5258 for (jo = 0; jo < ncols_o; jo++) { 5259 col = cmap[*bj]; 5260 if (col >= cstart) break; 5261 cj[k] = col; 5262 bj++; 5263 ca[k++] = *ba++; 5264 } 5265 /* diagonal portion of A */ 5266 for (j = 0; j < ncols_d; j++) { 5267 cj[k] = cstart + *aj++; 5268 ca[k++] = *aa++; 5269 } 5270 /* off-diagonal portion of A */ 5271 for (j = jo; j < ncols_o; j++) { 5272 cj[k] = cmap[*bj++]; 5273 ca[k++] = *ba++; 5274 } 5275 } 5276 /* put together the new matrix */ 5277 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5278 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5279 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5280 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5281 mat->free_a = PETSC_TRUE; 5282 mat->free_ij = PETSC_TRUE; 5283 mat->nonew = 0; 5284 } else if (scall == MAT_REUSE_MATRIX) { 5285 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5286 ci = mat->i; 5287 cj = mat->j; 5288 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5289 for (i = 0; i < am; i++) { 5290 /* off-diagonal portion of A */ 5291 ncols_o = bi[i + 1] - bi[i]; 5292 for (jo = 0; jo < ncols_o; jo++) { 5293 col = cmap[*bj]; 5294 if (col >= cstart) break; 5295 *cam++ = *ba++; 5296 bj++; 5297 } 5298 /* diagonal portion of A */ 5299 ncols_d = ai[i + 1] - ai[i]; 5300 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5301 /* off-diagonal portion of A */ 5302 for (j = jo; j < ncols_o; j++) { 5303 *cam++ = *ba++; 5304 bj++; 5305 } 5306 } 5307 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5308 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5309 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5310 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5311 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5312 PetscFunctionReturn(PETSC_SUCCESS); 5313 } 5314 5315 /*@ 5316 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5317 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5318 5319 Not Collective 5320 5321 Input Parameters: 5322 + A - the matrix 5323 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5324 5325 Output Parameters: 5326 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5327 - A_loc - the local sequential matrix generated 5328 5329 Level: developer 5330 5331 Note: 5332 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5333 part, then those associated with the off-diagonal part (in its local ordering) 5334 5335 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5336 @*/ 5337 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5338 { 5339 Mat Ao, Ad; 5340 const PetscInt *cmap; 5341 PetscMPIInt size; 5342 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5343 5344 PetscFunctionBegin; 5345 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5346 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5347 if (size == 1) { 5348 if (scall == MAT_INITIAL_MATRIX) { 5349 PetscCall(PetscObjectReference((PetscObject)Ad)); 5350 *A_loc = Ad; 5351 } else if (scall == MAT_REUSE_MATRIX) { 5352 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5353 } 5354 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5355 PetscFunctionReturn(PETSC_SUCCESS); 5356 } 5357 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5358 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5359 if (f) { 5360 PetscCall((*f)(A, scall, glob, A_loc)); 5361 } else { 5362 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5363 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5364 Mat_SeqAIJ *c; 5365 PetscInt *ai = a->i, *aj = a->j; 5366 PetscInt *bi = b->i, *bj = b->j; 5367 PetscInt *ci, *cj; 5368 const PetscScalar *aa, *ba; 5369 PetscScalar *ca; 5370 PetscInt i, j, am, dn, on; 5371 5372 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5373 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5374 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5375 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5376 if (scall == MAT_INITIAL_MATRIX) { 5377 PetscInt k; 5378 PetscCall(PetscMalloc1(1 + am, &ci)); 5379 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5380 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5381 ci[0] = 0; 5382 for (i = 0, k = 0; i < am; i++) { 5383 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5384 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5385 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5386 /* diagonal portion of A */ 5387 for (j = 0; j < ncols_d; j++, k++) { 5388 cj[k] = *aj++; 5389 ca[k] = *aa++; 5390 } 5391 /* off-diagonal portion of A */ 5392 for (j = 0; j < ncols_o; j++, k++) { 5393 cj[k] = dn + *bj++; 5394 ca[k] = *ba++; 5395 } 5396 } 5397 /* put together the new matrix */ 5398 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5399 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5400 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5401 c = (Mat_SeqAIJ *)(*A_loc)->data; 5402 c->free_a = PETSC_TRUE; 5403 c->free_ij = PETSC_TRUE; 5404 c->nonew = 0; 5405 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5406 } else if (scall == MAT_REUSE_MATRIX) { 5407 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5408 for (i = 0; i < am; i++) { 5409 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5410 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5411 /* diagonal portion of A */ 5412 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5413 /* off-diagonal portion of A */ 5414 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5415 } 5416 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5417 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5418 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5419 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5420 if (glob) { 5421 PetscInt cst, *gidx; 5422 5423 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5424 PetscCall(PetscMalloc1(dn + on, &gidx)); 5425 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5426 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5427 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5428 } 5429 } 5430 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5431 PetscFunctionReturn(PETSC_SUCCESS); 5432 } 5433 5434 /*@C 5435 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5436 5437 Not Collective 5438 5439 Input Parameters: 5440 + A - the matrix 5441 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5442 . row - index set of rows to extract (or `NULL`) 5443 - col - index set of columns to extract (or `NULL`) 5444 5445 Output Parameter: 5446 . A_loc - the local sequential matrix generated 5447 5448 Level: developer 5449 5450 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5451 @*/ 5452 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5453 { 5454 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5455 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5456 IS isrowa, iscola; 5457 Mat *aloc; 5458 PetscBool match; 5459 5460 PetscFunctionBegin; 5461 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5462 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5463 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5464 if (!row) { 5465 start = A->rmap->rstart; 5466 end = A->rmap->rend; 5467 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5468 } else { 5469 isrowa = *row; 5470 } 5471 if (!col) { 5472 start = A->cmap->rstart; 5473 cmap = a->garray; 5474 nzA = a->A->cmap->n; 5475 nzB = a->B->cmap->n; 5476 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5477 ncols = 0; 5478 for (i = 0; i < nzB; i++) { 5479 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5480 else break; 5481 } 5482 imark = i; 5483 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5484 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5485 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5486 } else { 5487 iscola = *col; 5488 } 5489 if (scall != MAT_INITIAL_MATRIX) { 5490 PetscCall(PetscMalloc1(1, &aloc)); 5491 aloc[0] = *A_loc; 5492 } 5493 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5494 if (!col) { /* attach global id of condensed columns */ 5495 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5496 } 5497 *A_loc = aloc[0]; 5498 PetscCall(PetscFree(aloc)); 5499 if (!row) PetscCall(ISDestroy(&isrowa)); 5500 if (!col) PetscCall(ISDestroy(&iscola)); 5501 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5502 PetscFunctionReturn(PETSC_SUCCESS); 5503 } 5504 5505 /* 5506 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5507 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5508 * on a global size. 5509 * */ 5510 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5511 { 5512 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5513 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5514 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5515 PetscMPIInt owner; 5516 PetscSFNode *iremote, *oiremote; 5517 const PetscInt *lrowindices; 5518 PetscSF sf, osf; 5519 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5520 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5521 MPI_Comm comm; 5522 ISLocalToGlobalMapping mapping; 5523 const PetscScalar *pd_a, *po_a; 5524 5525 PetscFunctionBegin; 5526 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5527 /* plocalsize is the number of roots 5528 * nrows is the number of leaves 5529 * */ 5530 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5531 PetscCall(ISGetLocalSize(rows, &nrows)); 5532 PetscCall(PetscCalloc1(nrows, &iremote)); 5533 PetscCall(ISGetIndices(rows, &lrowindices)); 5534 for (i = 0; i < nrows; i++) { 5535 /* Find a remote index and an owner for a row 5536 * The row could be local or remote 5537 * */ 5538 owner = 0; 5539 lidx = 0; 5540 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5541 iremote[i].index = lidx; 5542 iremote[i].rank = owner; 5543 } 5544 /* Create SF to communicate how many nonzero columns for each row */ 5545 PetscCall(PetscSFCreate(comm, &sf)); 5546 /* SF will figure out the number of nonzero columns for each row, and their 5547 * offsets 5548 * */ 5549 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5550 PetscCall(PetscSFSetFromOptions(sf)); 5551 PetscCall(PetscSFSetUp(sf)); 5552 5553 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5554 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5555 PetscCall(PetscCalloc1(nrows, &pnnz)); 5556 roffsets[0] = 0; 5557 roffsets[1] = 0; 5558 for (i = 0; i < plocalsize; i++) { 5559 /* diagonal */ 5560 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5561 /* off-diagonal */ 5562 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5563 /* compute offsets so that we relative location for each row */ 5564 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5565 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5566 } 5567 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5568 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5569 /* 'r' means root, and 'l' means leaf */ 5570 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5571 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5572 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5573 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5574 PetscCall(PetscSFDestroy(&sf)); 5575 PetscCall(PetscFree(roffsets)); 5576 PetscCall(PetscFree(nrcols)); 5577 dntotalcols = 0; 5578 ontotalcols = 0; 5579 ncol = 0; 5580 for (i = 0; i < nrows; i++) { 5581 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5582 ncol = PetscMax(pnnz[i], ncol); 5583 /* diagonal */ 5584 dntotalcols += nlcols[i * 2 + 0]; 5585 /* off-diagonal */ 5586 ontotalcols += nlcols[i * 2 + 1]; 5587 } 5588 /* We do not need to figure the right number of columns 5589 * since all the calculations will be done by going through the raw data 5590 * */ 5591 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5592 PetscCall(MatSetUp(*P_oth)); 5593 PetscCall(PetscFree(pnnz)); 5594 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5595 /* diagonal */ 5596 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5597 /* off-diagonal */ 5598 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5599 /* diagonal */ 5600 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5601 /* off-diagonal */ 5602 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5603 dntotalcols = 0; 5604 ontotalcols = 0; 5605 ntotalcols = 0; 5606 for (i = 0; i < nrows; i++) { 5607 owner = 0; 5608 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5609 /* Set iremote for diag matrix */ 5610 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5611 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5612 iremote[dntotalcols].rank = owner; 5613 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5614 ilocal[dntotalcols++] = ntotalcols++; 5615 } 5616 /* off-diagonal */ 5617 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5618 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5619 oiremote[ontotalcols].rank = owner; 5620 oilocal[ontotalcols++] = ntotalcols++; 5621 } 5622 } 5623 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5624 PetscCall(PetscFree(loffsets)); 5625 PetscCall(PetscFree(nlcols)); 5626 PetscCall(PetscSFCreate(comm, &sf)); 5627 /* P serves as roots and P_oth is leaves 5628 * Diag matrix 5629 * */ 5630 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5631 PetscCall(PetscSFSetFromOptions(sf)); 5632 PetscCall(PetscSFSetUp(sf)); 5633 5634 PetscCall(PetscSFCreate(comm, &osf)); 5635 /* off-diagonal */ 5636 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5637 PetscCall(PetscSFSetFromOptions(osf)); 5638 PetscCall(PetscSFSetUp(osf)); 5639 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5640 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5641 /* operate on the matrix internal data to save memory */ 5642 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5644 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5645 /* Convert to global indices for diag matrix */ 5646 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5647 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5648 /* We want P_oth store global indices */ 5649 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5650 /* Use memory scalable approach */ 5651 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5652 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5653 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5654 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5655 /* Convert back to local indices */ 5656 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5657 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5658 nout = 0; 5659 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5660 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5661 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5662 /* Exchange values */ 5663 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5664 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5665 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5666 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5667 /* Stop PETSc from shrinking memory */ 5668 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5669 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5670 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5671 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5672 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5673 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5674 PetscCall(PetscSFDestroy(&sf)); 5675 PetscCall(PetscSFDestroy(&osf)); 5676 PetscFunctionReturn(PETSC_SUCCESS); 5677 } 5678 5679 /* 5680 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5681 * This supports MPIAIJ and MAIJ 5682 * */ 5683 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5684 { 5685 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5686 Mat_SeqAIJ *p_oth; 5687 IS rows, map; 5688 PetscHMapI hamp; 5689 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5690 MPI_Comm comm; 5691 PetscSF sf, osf; 5692 PetscBool has; 5693 5694 PetscFunctionBegin; 5695 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5696 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5697 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5698 * and then create a submatrix (that often is an overlapping matrix) 5699 * */ 5700 if (reuse == MAT_INITIAL_MATRIX) { 5701 /* Use a hash table to figure out unique keys */ 5702 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5703 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5704 count = 0; 5705 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5706 for (i = 0; i < a->B->cmap->n; i++) { 5707 key = a->garray[i] / dof; 5708 PetscCall(PetscHMapIHas(hamp, key, &has)); 5709 if (!has) { 5710 mapping[i] = count; 5711 PetscCall(PetscHMapISet(hamp, key, count++)); 5712 } else { 5713 /* Current 'i' has the same value the previous step */ 5714 mapping[i] = count - 1; 5715 } 5716 } 5717 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5718 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5719 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5720 PetscCall(PetscCalloc1(htsize, &rowindices)); 5721 off = 0; 5722 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5723 PetscCall(PetscHMapIDestroy(&hamp)); 5724 PetscCall(PetscSortInt(htsize, rowindices)); 5725 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5726 /* In case, the matrix was already created but users want to recreate the matrix */ 5727 PetscCall(MatDestroy(P_oth)); 5728 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5729 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5730 PetscCall(ISDestroy(&map)); 5731 PetscCall(ISDestroy(&rows)); 5732 } else if (reuse == MAT_REUSE_MATRIX) { 5733 /* If matrix was already created, we simply update values using SF objects 5734 * that as attached to the matrix earlier. 5735 */ 5736 const PetscScalar *pd_a, *po_a; 5737 5738 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5739 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5740 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5741 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5742 /* Update values in place */ 5743 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5744 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5745 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5746 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5747 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5748 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5749 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5750 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5751 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5752 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5753 PetscFunctionReturn(PETSC_SUCCESS); 5754 } 5755 5756 /*@C 5757 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5758 5759 Collective 5760 5761 Input Parameters: 5762 + A - the first matrix in `MATMPIAIJ` format 5763 . B - the second matrix in `MATMPIAIJ` format 5764 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5765 5766 Output Parameters: 5767 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5768 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5769 - B_seq - the sequential matrix generated 5770 5771 Level: developer 5772 5773 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5774 @*/ 5775 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5776 { 5777 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5778 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5779 IS isrowb, iscolb; 5780 Mat *bseq = NULL; 5781 5782 PetscFunctionBegin; 5783 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5784 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5785 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5786 5787 if (scall == MAT_INITIAL_MATRIX) { 5788 start = A->cmap->rstart; 5789 cmap = a->garray; 5790 nzA = a->A->cmap->n; 5791 nzB = a->B->cmap->n; 5792 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5793 ncols = 0; 5794 for (i = 0; i < nzB; i++) { /* row < local row index */ 5795 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5796 else break; 5797 } 5798 imark = i; 5799 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5800 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5801 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5802 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5803 } else { 5804 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5805 isrowb = *rowb; 5806 iscolb = *colb; 5807 PetscCall(PetscMalloc1(1, &bseq)); 5808 bseq[0] = *B_seq; 5809 } 5810 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5811 *B_seq = bseq[0]; 5812 PetscCall(PetscFree(bseq)); 5813 if (!rowb) { 5814 PetscCall(ISDestroy(&isrowb)); 5815 } else { 5816 *rowb = isrowb; 5817 } 5818 if (!colb) { 5819 PetscCall(ISDestroy(&iscolb)); 5820 } else { 5821 *colb = iscolb; 5822 } 5823 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5824 PetscFunctionReturn(PETSC_SUCCESS); 5825 } 5826 5827 /* 5828 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5829 of the OFF-DIAGONAL portion of local A 5830 5831 Collective 5832 5833 Input Parameters: 5834 + A,B - the matrices in `MATMPIAIJ` format 5835 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5836 5837 Output Parameter: 5838 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5839 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5840 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5841 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5842 5843 Developer Note: 5844 This directly accesses information inside the VecScatter associated with the matrix-vector product 5845 for this matrix. This is not desirable.. 5846 5847 Level: developer 5848 5849 */ 5850 5851 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5852 { 5853 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5854 VecScatter ctx; 5855 MPI_Comm comm; 5856 const PetscMPIInt *rprocs, *sprocs; 5857 PetscMPIInt nrecvs, nsends; 5858 const PetscInt *srow, *rstarts, *sstarts; 5859 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5860 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5861 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5862 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5863 PetscMPIInt size, tag, rank, nreqs; 5864 5865 PetscFunctionBegin; 5866 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5867 PetscCallMPI(MPI_Comm_size(comm, &size)); 5868 5869 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5870 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5871 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5872 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5873 5874 if (size == 1) { 5875 startsj_s = NULL; 5876 bufa_ptr = NULL; 5877 *B_oth = NULL; 5878 PetscFunctionReturn(PETSC_SUCCESS); 5879 } 5880 5881 ctx = a->Mvctx; 5882 tag = ((PetscObject)ctx)->tag; 5883 5884 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5885 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5886 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5887 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5888 PetscCall(PetscMalloc1(nreqs, &reqs)); 5889 rwaits = reqs; 5890 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5891 5892 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5893 if (scall == MAT_INITIAL_MATRIX) { 5894 /* i-array */ 5895 /* post receives */ 5896 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5897 for (i = 0; i < nrecvs; i++) { 5898 rowlen = rvalues + rstarts[i] * rbs; 5899 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5900 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5901 } 5902 5903 /* pack the outgoing message */ 5904 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5905 5906 sstartsj[0] = 0; 5907 rstartsj[0] = 0; 5908 len = 0; /* total length of j or a array to be sent */ 5909 if (nsends) { 5910 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5911 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5912 } 5913 for (i = 0; i < nsends; i++) { 5914 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5915 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5916 for (j = 0; j < nrows; j++) { 5917 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5918 for (l = 0; l < sbs; l++) { 5919 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5920 5921 rowlen[j * sbs + l] = ncols; 5922 5923 len += ncols; 5924 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5925 } 5926 k++; 5927 } 5928 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5929 5930 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5931 } 5932 /* recvs and sends of i-array are completed */ 5933 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5934 PetscCall(PetscFree(svalues)); 5935 5936 /* allocate buffers for sending j and a arrays */ 5937 PetscCall(PetscMalloc1(len + 1, &bufj)); 5938 PetscCall(PetscMalloc1(len + 1, &bufa)); 5939 5940 /* create i-array of B_oth */ 5941 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5942 5943 b_othi[0] = 0; 5944 len = 0; /* total length of j or a array to be received */ 5945 k = 0; 5946 for (i = 0; i < nrecvs; i++) { 5947 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5948 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5949 for (j = 0; j < nrows; j++) { 5950 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5951 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5952 k++; 5953 } 5954 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5955 } 5956 PetscCall(PetscFree(rvalues)); 5957 5958 /* allocate space for j and a arrays of B_oth */ 5959 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5960 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5961 5962 /* j-array */ 5963 /* post receives of j-array */ 5964 for (i = 0; i < nrecvs; i++) { 5965 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5966 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5967 } 5968 5969 /* pack the outgoing message j-array */ 5970 if (nsends) k = sstarts[0]; 5971 for (i = 0; i < nsends; i++) { 5972 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5973 bufJ = bufj + sstartsj[i]; 5974 for (j = 0; j < nrows; j++) { 5975 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5976 for (ll = 0; ll < sbs; ll++) { 5977 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5978 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5979 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5980 } 5981 } 5982 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5983 } 5984 5985 /* recvs and sends of j-array are completed */ 5986 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5987 } else if (scall == MAT_REUSE_MATRIX) { 5988 sstartsj = *startsj_s; 5989 rstartsj = *startsj_r; 5990 bufa = *bufa_ptr; 5991 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5992 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5993 5994 /* a-array */ 5995 /* post receives of a-array */ 5996 for (i = 0; i < nrecvs; i++) { 5997 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5998 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5999 } 6000 6001 /* pack the outgoing message a-array */ 6002 if (nsends) k = sstarts[0]; 6003 for (i = 0; i < nsends; i++) { 6004 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6005 bufA = bufa + sstartsj[i]; 6006 for (j = 0; j < nrows; j++) { 6007 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6008 for (ll = 0; ll < sbs; ll++) { 6009 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6010 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6011 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6012 } 6013 } 6014 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6015 } 6016 /* recvs and sends of a-array are completed */ 6017 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6018 PetscCall(PetscFree(reqs)); 6019 6020 if (scall == MAT_INITIAL_MATRIX) { 6021 Mat_SeqAIJ *b_oth; 6022 6023 /* put together the new matrix */ 6024 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6025 6026 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6027 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6028 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6029 b_oth->free_a = PETSC_TRUE; 6030 b_oth->free_ij = PETSC_TRUE; 6031 b_oth->nonew = 0; 6032 6033 PetscCall(PetscFree(bufj)); 6034 if (!startsj_s || !bufa_ptr) { 6035 PetscCall(PetscFree2(sstartsj, rstartsj)); 6036 PetscCall(PetscFree(bufa_ptr)); 6037 } else { 6038 *startsj_s = sstartsj; 6039 *startsj_r = rstartsj; 6040 *bufa_ptr = bufa; 6041 } 6042 } else if (scall == MAT_REUSE_MATRIX) { 6043 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6044 } 6045 6046 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6047 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6048 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6049 PetscFunctionReturn(PETSC_SUCCESS); 6050 } 6051 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6054 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6055 #if defined(PETSC_HAVE_MKL_SPARSE) 6056 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6057 #endif 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6060 #if defined(PETSC_HAVE_ELEMENTAL) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 #if defined(PETSC_HAVE_SCALAPACK) 6064 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6065 #endif 6066 #if defined(PETSC_HAVE_HYPRE) 6067 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6068 #endif 6069 #if defined(PETSC_HAVE_CUDA) 6070 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6071 #endif 6072 #if defined(PETSC_HAVE_HIP) 6073 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6074 #endif 6075 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6076 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6077 #endif 6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6079 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6080 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6081 6082 /* 6083 Computes (B'*A')' since computing B*A directly is untenable 6084 6085 n p p 6086 [ ] [ ] [ ] 6087 m [ A ] * n [ B ] = m [ C ] 6088 [ ] [ ] [ ] 6089 6090 */ 6091 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6092 { 6093 Mat At, Bt, Ct; 6094 6095 PetscFunctionBegin; 6096 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6097 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6098 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6099 PetscCall(MatDestroy(&At)); 6100 PetscCall(MatDestroy(&Bt)); 6101 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6102 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6103 PetscCall(MatDestroy(&Ct)); 6104 PetscFunctionReturn(PETSC_SUCCESS); 6105 } 6106 6107 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6108 { 6109 PetscBool cisdense; 6110 6111 PetscFunctionBegin; 6112 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6113 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6114 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6115 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6116 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6117 PetscCall(MatSetUp(C)); 6118 6119 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6120 PetscFunctionReturn(PETSC_SUCCESS); 6121 } 6122 6123 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6124 { 6125 Mat_Product *product = C->product; 6126 Mat A = product->A, B = product->B; 6127 6128 PetscFunctionBegin; 6129 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6130 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6131 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6132 C->ops->productsymbolic = MatProductSymbolic_AB; 6133 PetscFunctionReturn(PETSC_SUCCESS); 6134 } 6135 6136 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6137 { 6138 Mat_Product *product = C->product; 6139 6140 PetscFunctionBegin; 6141 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6142 PetscFunctionReturn(PETSC_SUCCESS); 6143 } 6144 6145 /* 6146 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6147 6148 Input Parameters: 6149 6150 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6151 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6152 6153 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6154 6155 For Set1, j1[] contains column indices of the nonzeros. 6156 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6157 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6158 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6159 6160 Similar for Set2. 6161 6162 This routine merges the two sets of nonzeros row by row and removes repeats. 6163 6164 Output Parameters: (memory is allocated by the caller) 6165 6166 i[],j[]: the CSR of the merged matrix, which has m rows. 6167 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6168 imap2[]: similar to imap1[], but for Set2. 6169 Note we order nonzeros row-by-row and from left to right. 6170 */ 6171 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6172 { 6173 PetscInt r, m; /* Row index of mat */ 6174 PetscCount t, t1, t2, b1, e1, b2, e2; 6175 6176 PetscFunctionBegin; 6177 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6178 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6179 i[0] = 0; 6180 for (r = 0; r < m; r++) { /* Do row by row merging */ 6181 b1 = rowBegin1[r]; 6182 e1 = rowEnd1[r]; 6183 b2 = rowBegin2[r]; 6184 e2 = rowEnd2[r]; 6185 while (b1 < e1 && b2 < e2) { 6186 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6187 j[t] = j1[b1]; 6188 imap1[t1] = t; 6189 imap2[t2] = t; 6190 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6191 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6192 t1++; 6193 t2++; 6194 t++; 6195 } else if (j1[b1] < j2[b2]) { 6196 j[t] = j1[b1]; 6197 imap1[t1] = t; 6198 b1 += jmap1[t1 + 1] - jmap1[t1]; 6199 t1++; 6200 t++; 6201 } else { 6202 j[t] = j2[b2]; 6203 imap2[t2] = t; 6204 b2 += jmap2[t2 + 1] - jmap2[t2]; 6205 t2++; 6206 t++; 6207 } 6208 } 6209 /* Merge the remaining in either j1[] or j2[] */ 6210 while (b1 < e1) { 6211 j[t] = j1[b1]; 6212 imap1[t1] = t; 6213 b1 += jmap1[t1 + 1] - jmap1[t1]; 6214 t1++; 6215 t++; 6216 } 6217 while (b2 < e2) { 6218 j[t] = j2[b2]; 6219 imap2[t2] = t; 6220 b2 += jmap2[t2 + 1] - jmap2[t2]; 6221 t2++; 6222 t++; 6223 } 6224 PetscCall(PetscIntCast(t, i + r + 1)); 6225 } 6226 PetscFunctionReturn(PETSC_SUCCESS); 6227 } 6228 6229 /* 6230 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6231 6232 Input Parameters: 6233 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6234 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6235 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6236 6237 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6238 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6239 6240 Output Parameters: 6241 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6242 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6243 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6244 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6245 6246 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6247 Atot: number of entries belonging to the diagonal block. 6248 Annz: number of unique nonzeros belonging to the diagonal block. 6249 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6250 repeats (i.e., same 'i,j' pair). 6251 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6252 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6253 6254 Atot: number of entries belonging to the diagonal block 6255 Annz: number of unique nonzeros belonging to the diagonal block. 6256 6257 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6258 6259 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6260 */ 6261 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6262 { 6263 PetscInt cstart, cend, rstart, rend, row, col; 6264 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6265 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6266 PetscCount k, m, p, q, r, s, mid; 6267 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6268 6269 PetscFunctionBegin; 6270 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6271 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6272 m = rend - rstart; 6273 6274 /* Skip negative rows */ 6275 for (k = 0; k < n; k++) 6276 if (i[k] >= 0) break; 6277 6278 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6279 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6280 */ 6281 while (k < n) { 6282 row = i[k]; 6283 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6284 for (s = k; s < n; s++) 6285 if (i[s] != row) break; 6286 6287 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6288 for (p = k; p < s; p++) { 6289 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6290 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6291 } 6292 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6293 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6294 rowBegin[row - rstart] = k; 6295 rowMid[row - rstart] = mid; 6296 rowEnd[row - rstart] = s; 6297 6298 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6299 Atot += mid - k; 6300 Btot += s - mid; 6301 6302 /* Count unique nonzeros of this diag row */ 6303 for (p = k; p < mid;) { 6304 col = j[p]; 6305 do { 6306 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6307 p++; 6308 } while (p < mid && j[p] == col); 6309 Annz++; 6310 } 6311 6312 /* Count unique nonzeros of this offdiag row */ 6313 for (p = mid; p < s;) { 6314 col = j[p]; 6315 do { 6316 p++; 6317 } while (p < s && j[p] == col); 6318 Bnnz++; 6319 } 6320 k = s; 6321 } 6322 6323 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6324 PetscCall(PetscMalloc1(Atot, &Aperm)); 6325 PetscCall(PetscMalloc1(Btot, &Bperm)); 6326 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6327 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6328 6329 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6330 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6331 for (r = 0; r < m; r++) { 6332 k = rowBegin[r]; 6333 mid = rowMid[r]; 6334 s = rowEnd[r]; 6335 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6336 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6337 Atot += mid - k; 6338 Btot += s - mid; 6339 6340 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6341 for (p = k; p < mid;) { 6342 col = j[p]; 6343 q = p; 6344 do { 6345 p++; 6346 } while (p < mid && j[p] == col); 6347 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6348 Annz++; 6349 } 6350 6351 for (p = mid; p < s;) { 6352 col = j[p]; 6353 q = p; 6354 do { 6355 p++; 6356 } while (p < s && j[p] == col); 6357 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6358 Bnnz++; 6359 } 6360 } 6361 /* Output */ 6362 *Aperm_ = Aperm; 6363 *Annz_ = Annz; 6364 *Atot_ = Atot; 6365 *Ajmap_ = Ajmap; 6366 *Bperm_ = Bperm; 6367 *Bnnz_ = Bnnz; 6368 *Btot_ = Btot; 6369 *Bjmap_ = Bjmap; 6370 PetscFunctionReturn(PETSC_SUCCESS); 6371 } 6372 6373 /* 6374 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6375 6376 Input Parameters: 6377 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6378 nnz: number of unique nonzeros in the merged matrix 6379 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6380 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6381 6382 Output Parameter: (memory is allocated by the caller) 6383 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6384 6385 Example: 6386 nnz1 = 4 6387 nnz = 6 6388 imap = [1,3,4,5] 6389 jmap = [0,3,5,6,7] 6390 then, 6391 jmap_new = [0,0,3,3,5,6,7] 6392 */ 6393 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6394 { 6395 PetscCount k, p; 6396 6397 PetscFunctionBegin; 6398 jmap_new[0] = 0; 6399 p = nnz; /* p loops over jmap_new[] backwards */ 6400 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6401 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6402 } 6403 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6404 PetscFunctionReturn(PETSC_SUCCESS); 6405 } 6406 6407 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6408 { 6409 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6410 6411 PetscFunctionBegin; 6412 PetscCall(PetscSFDestroy(&coo->sf)); 6413 PetscCall(PetscFree(coo->Aperm1)); 6414 PetscCall(PetscFree(coo->Bperm1)); 6415 PetscCall(PetscFree(coo->Ajmap1)); 6416 PetscCall(PetscFree(coo->Bjmap1)); 6417 PetscCall(PetscFree(coo->Aimap2)); 6418 PetscCall(PetscFree(coo->Bimap2)); 6419 PetscCall(PetscFree(coo->Aperm2)); 6420 PetscCall(PetscFree(coo->Bperm2)); 6421 PetscCall(PetscFree(coo->Ajmap2)); 6422 PetscCall(PetscFree(coo->Bjmap2)); 6423 PetscCall(PetscFree(coo->Cperm1)); 6424 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6425 PetscCall(PetscFree(coo)); 6426 PetscFunctionReturn(PETSC_SUCCESS); 6427 } 6428 6429 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6430 { 6431 MPI_Comm comm; 6432 PetscMPIInt rank, size; 6433 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6434 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6435 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6436 PetscContainer container; 6437 MatCOOStruct_MPIAIJ *coo; 6438 6439 PetscFunctionBegin; 6440 PetscCall(PetscFree(mpiaij->garray)); 6441 PetscCall(VecDestroy(&mpiaij->lvec)); 6442 #if defined(PETSC_USE_CTABLE) 6443 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6444 #else 6445 PetscCall(PetscFree(mpiaij->colmap)); 6446 #endif 6447 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6448 mat->assembled = PETSC_FALSE; 6449 mat->was_assembled = PETSC_FALSE; 6450 6451 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6452 PetscCallMPI(MPI_Comm_size(comm, &size)); 6453 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6454 PetscCall(PetscLayoutSetUp(mat->rmap)); 6455 PetscCall(PetscLayoutSetUp(mat->cmap)); 6456 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6457 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6458 PetscCall(MatGetLocalSize(mat, &m, &n)); 6459 PetscCall(MatGetSize(mat, &M, &N)); 6460 6461 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6462 /* entries come first, then local rows, then remote rows. */ 6463 PetscCount n1 = coo_n, *perm1; 6464 PetscInt *i1 = coo_i, *j1 = coo_j; 6465 6466 PetscCall(PetscMalloc1(n1, &perm1)); 6467 for (k = 0; k < n1; k++) perm1[k] = k; 6468 6469 /* Manipulate indices so that entries with negative row or col indices will have smallest 6470 row indices, local entries will have greater but negative row indices, and remote entries 6471 will have positive row indices. 6472 */ 6473 for (k = 0; k < n1; k++) { 6474 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6475 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6476 else { 6477 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6478 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6479 } 6480 } 6481 6482 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6483 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6484 6485 /* Advance k to the first entry we need to take care of */ 6486 for (k = 0; k < n1; k++) 6487 if (i1[k] > PETSC_INT_MIN) break; 6488 PetscCount i1start = k; 6489 6490 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6491 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6492 6493 /* Send remote rows to their owner */ 6494 /* Find which rows should be sent to which remote ranks*/ 6495 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6496 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6497 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6498 const PetscInt *ranges; 6499 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6500 6501 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6502 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6503 for (k = rem; k < n1;) { 6504 PetscMPIInt owner; 6505 PetscInt firstRow, lastRow; 6506 6507 /* Locate a row range */ 6508 firstRow = i1[k]; /* first row of this owner */ 6509 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6510 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6511 6512 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6513 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6514 6515 /* All entries in [k,p) belong to this remote owner */ 6516 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6517 PetscMPIInt *sendto2; 6518 PetscInt *nentries2; 6519 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6520 6521 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6522 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6523 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6524 PetscCall(PetscFree2(sendto, nentries2)); 6525 sendto = sendto2; 6526 nentries = nentries2; 6527 maxNsend = maxNsend2; 6528 } 6529 sendto[nsend] = owner; 6530 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6531 nsend++; 6532 k = p; 6533 } 6534 6535 /* Build 1st SF to know offsets on remote to send data */ 6536 PetscSF sf1; 6537 PetscInt nroots = 1, nroots2 = 0; 6538 PetscInt nleaves = nsend, nleaves2 = 0; 6539 PetscInt *offsets; 6540 PetscSFNode *iremote; 6541 6542 PetscCall(PetscSFCreate(comm, &sf1)); 6543 PetscCall(PetscMalloc1(nsend, &iremote)); 6544 PetscCall(PetscMalloc1(nsend, &offsets)); 6545 for (k = 0; k < nsend; k++) { 6546 iremote[k].rank = sendto[k]; 6547 iremote[k].index = 0; 6548 nleaves2 += nentries[k]; 6549 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6550 } 6551 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6552 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6553 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6554 PetscCall(PetscSFDestroy(&sf1)); 6555 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6556 6557 /* Build 2nd SF to send remote COOs to their owner */ 6558 PetscSF sf2; 6559 nroots = nroots2; 6560 nleaves = nleaves2; 6561 PetscCall(PetscSFCreate(comm, &sf2)); 6562 PetscCall(PetscSFSetFromOptions(sf2)); 6563 PetscCall(PetscMalloc1(nleaves, &iremote)); 6564 p = 0; 6565 for (k = 0; k < nsend; k++) { 6566 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6567 for (q = 0; q < nentries[k]; q++, p++) { 6568 iremote[p].rank = sendto[k]; 6569 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6570 } 6571 } 6572 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6573 6574 /* Send the remote COOs to their owner */ 6575 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6576 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6577 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6578 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6579 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6580 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6581 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6582 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6583 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6584 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6585 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6586 6587 PetscCall(PetscFree(offsets)); 6588 PetscCall(PetscFree2(sendto, nentries)); 6589 6590 /* Sort received COOs by row along with the permutation array */ 6591 for (k = 0; k < n2; k++) perm2[k] = k; 6592 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6593 6594 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6595 PetscCount *Cperm1; 6596 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6597 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6598 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6599 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6600 6601 /* Support for HYPRE matrices, kind of a hack. 6602 Swap min column with diagonal so that diagonal values will go first */ 6603 PetscBool hypre; 6604 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6605 if (hypre) { 6606 PetscInt *minj; 6607 PetscBT hasdiag; 6608 6609 PetscCall(PetscBTCreate(m, &hasdiag)); 6610 PetscCall(PetscMalloc1(m, &minj)); 6611 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6612 for (k = i1start; k < rem; k++) { 6613 if (j1[k] < cstart || j1[k] >= cend) continue; 6614 const PetscInt rindex = i1[k] - rstart; 6615 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6616 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6617 } 6618 for (k = 0; k < n2; k++) { 6619 if (j2[k] < cstart || j2[k] >= cend) continue; 6620 const PetscInt rindex = i2[k] - rstart; 6621 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6622 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6623 } 6624 for (k = i1start; k < rem; k++) { 6625 const PetscInt rindex = i1[k] - rstart; 6626 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6627 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6628 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6629 } 6630 for (k = 0; k < n2; k++) { 6631 const PetscInt rindex = i2[k] - rstart; 6632 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6633 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6634 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6635 } 6636 PetscCall(PetscBTDestroy(&hasdiag)); 6637 PetscCall(PetscFree(minj)); 6638 } 6639 6640 /* Split local COOs and received COOs into diag/offdiag portions */ 6641 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6642 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6643 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6644 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6645 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6646 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6647 6648 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6649 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6650 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6651 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6652 6653 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6654 PetscInt *Ai, *Bi; 6655 PetscInt *Aj, *Bj; 6656 6657 PetscCall(PetscMalloc1(m + 1, &Ai)); 6658 PetscCall(PetscMalloc1(m + 1, &Bi)); 6659 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6660 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6661 6662 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6663 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6664 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6665 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6666 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6667 6668 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6669 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6670 6671 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6672 /* expect nonzeros in A/B most likely have local contributing entries */ 6673 PetscInt Annz = Ai[m]; 6674 PetscInt Bnnz = Bi[m]; 6675 PetscCount *Ajmap1_new, *Bjmap1_new; 6676 6677 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6678 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6679 6680 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6681 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6682 6683 PetscCall(PetscFree(Aimap1)); 6684 PetscCall(PetscFree(Ajmap1)); 6685 PetscCall(PetscFree(Bimap1)); 6686 PetscCall(PetscFree(Bjmap1)); 6687 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6688 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6689 PetscCall(PetscFree(perm1)); 6690 PetscCall(PetscFree3(i2, j2, perm2)); 6691 6692 Ajmap1 = Ajmap1_new; 6693 Bjmap1 = Bjmap1_new; 6694 6695 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6696 if (Annz < Annz1 + Annz2) { 6697 PetscInt *Aj_new; 6698 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6699 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6700 PetscCall(PetscFree(Aj)); 6701 Aj = Aj_new; 6702 } 6703 6704 if (Bnnz < Bnnz1 + Bnnz2) { 6705 PetscInt *Bj_new; 6706 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6707 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6708 PetscCall(PetscFree(Bj)); 6709 Bj = Bj_new; 6710 } 6711 6712 /* Create new submatrices for on-process and off-process coupling */ 6713 PetscScalar *Aa, *Ba; 6714 MatType rtype; 6715 Mat_SeqAIJ *a, *b; 6716 PetscObjectState state; 6717 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6718 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6719 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6720 if (cstart) { 6721 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6722 } 6723 6724 PetscCall(MatGetRootType_Private(mat, &rtype)); 6725 6726 MatSeqXAIJGetOptions_Private(mpiaij->A); 6727 PetscCall(MatDestroy(&mpiaij->A)); 6728 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6729 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6730 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6731 6732 MatSeqXAIJGetOptions_Private(mpiaij->B); 6733 PetscCall(MatDestroy(&mpiaij->B)); 6734 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6735 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6736 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6737 6738 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6739 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6740 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6741 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6742 6743 a = (Mat_SeqAIJ *)mpiaij->A->data; 6744 b = (Mat_SeqAIJ *)mpiaij->B->data; 6745 a->free_a = PETSC_TRUE; 6746 a->free_ij = PETSC_TRUE; 6747 b->free_a = PETSC_TRUE; 6748 b->free_ij = PETSC_TRUE; 6749 a->maxnz = a->nz; 6750 b->maxnz = b->nz; 6751 6752 /* conversion must happen AFTER multiply setup */ 6753 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6754 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6755 PetscCall(VecDestroy(&mpiaij->lvec)); 6756 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6757 6758 // Put the COO struct in a container and then attach that to the matrix 6759 PetscCall(PetscMalloc1(1, &coo)); 6760 coo->n = coo_n; 6761 coo->sf = sf2; 6762 coo->sendlen = nleaves; 6763 coo->recvlen = nroots; 6764 coo->Annz = Annz; 6765 coo->Bnnz = Bnnz; 6766 coo->Annz2 = Annz2; 6767 coo->Bnnz2 = Bnnz2; 6768 coo->Atot1 = Atot1; 6769 coo->Atot2 = Atot2; 6770 coo->Btot1 = Btot1; 6771 coo->Btot2 = Btot2; 6772 coo->Ajmap1 = Ajmap1; 6773 coo->Aperm1 = Aperm1; 6774 coo->Bjmap1 = Bjmap1; 6775 coo->Bperm1 = Bperm1; 6776 coo->Aimap2 = Aimap2; 6777 coo->Ajmap2 = Ajmap2; 6778 coo->Aperm2 = Aperm2; 6779 coo->Bimap2 = Bimap2; 6780 coo->Bjmap2 = Bjmap2; 6781 coo->Bperm2 = Bperm2; 6782 coo->Cperm1 = Cperm1; 6783 // Allocate in preallocation. If not used, it has zero cost on host 6784 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6785 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6786 PetscCall(PetscContainerSetPointer(container, coo)); 6787 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6788 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6789 PetscCall(PetscContainerDestroy(&container)); 6790 PetscFunctionReturn(PETSC_SUCCESS); 6791 } 6792 6793 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6794 { 6795 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6796 Mat A = mpiaij->A, B = mpiaij->B; 6797 PetscScalar *Aa, *Ba; 6798 PetscScalar *sendbuf, *recvbuf; 6799 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6800 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6801 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6802 const PetscCount *Cperm1; 6803 PetscContainer container; 6804 MatCOOStruct_MPIAIJ *coo; 6805 6806 PetscFunctionBegin; 6807 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6808 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6809 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6810 sendbuf = coo->sendbuf; 6811 recvbuf = coo->recvbuf; 6812 Ajmap1 = coo->Ajmap1; 6813 Ajmap2 = coo->Ajmap2; 6814 Aimap2 = coo->Aimap2; 6815 Bjmap1 = coo->Bjmap1; 6816 Bjmap2 = coo->Bjmap2; 6817 Bimap2 = coo->Bimap2; 6818 Aperm1 = coo->Aperm1; 6819 Aperm2 = coo->Aperm2; 6820 Bperm1 = coo->Bperm1; 6821 Bperm2 = coo->Bperm2; 6822 Cperm1 = coo->Cperm1; 6823 6824 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6825 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6826 6827 /* Pack entries to be sent to remote */ 6828 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6829 6830 /* Send remote entries to their owner and overlap the communication with local computation */ 6831 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6832 /* Add local entries to A and B */ 6833 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6834 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6835 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6836 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6837 } 6838 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6839 PetscScalar sum = 0.0; 6840 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6841 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6842 } 6843 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6844 6845 /* Add received remote entries to A and B */ 6846 for (PetscCount i = 0; i < coo->Annz2; i++) { 6847 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6848 } 6849 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6850 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6851 } 6852 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6853 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6854 PetscFunctionReturn(PETSC_SUCCESS); 6855 } 6856 6857 /*MC 6858 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6859 6860 Options Database Keys: 6861 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6862 6863 Level: beginner 6864 6865 Notes: 6866 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6867 in this case the values associated with the rows and columns one passes in are set to zero 6868 in the matrix 6869 6870 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6871 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6872 6873 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6874 M*/ 6875 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6876 { 6877 Mat_MPIAIJ *b; 6878 PetscMPIInt size; 6879 6880 PetscFunctionBegin; 6881 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6882 6883 PetscCall(PetscNew(&b)); 6884 B->data = (void *)b; 6885 B->ops[0] = MatOps_Values; 6886 B->assembled = PETSC_FALSE; 6887 B->insertmode = NOT_SET_VALUES; 6888 b->size = size; 6889 6890 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6891 6892 /* build cache for off array entries formed */ 6893 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6894 6895 b->donotstash = PETSC_FALSE; 6896 b->colmap = NULL; 6897 b->garray = NULL; 6898 b->roworiented = PETSC_TRUE; 6899 6900 /* stuff used for matrix vector multiply */ 6901 b->lvec = NULL; 6902 b->Mvctx = NULL; 6903 6904 /* stuff for MatGetRow() */ 6905 b->rowindices = NULL; 6906 b->rowvalues = NULL; 6907 b->getrowactive = PETSC_FALSE; 6908 6909 /* flexible pointer used in CUSPARSE classes */ 6910 b->spptr = NULL; 6911 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6923 #if defined(PETSC_HAVE_CUDA) 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6925 #endif 6926 #if defined(PETSC_HAVE_HIP) 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6928 #endif 6929 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6931 #endif 6932 #if defined(PETSC_HAVE_MKL_SPARSE) 6933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6934 #endif 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6939 #if defined(PETSC_HAVE_ELEMENTAL) 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6941 #endif 6942 #if defined(PETSC_HAVE_SCALAPACK) 6943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6944 #endif 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6947 #if defined(PETSC_HAVE_HYPRE) 6948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6950 #endif 6951 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6952 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6954 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6955 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6956 PetscFunctionReturn(PETSC_SUCCESS); 6957 } 6958 6959 /*@ 6960 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6961 and "off-diagonal" part of the matrix in CSR format. 6962 6963 Collective 6964 6965 Input Parameters: 6966 + comm - MPI communicator 6967 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6968 . n - This value should be the same as the local size used in creating the 6969 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6970 calculated if `N` is given) For square matrices `n` is almost always `m`. 6971 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6972 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6973 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6974 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6975 . a - matrix values 6976 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6977 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6978 - oa - matrix values 6979 6980 Output Parameter: 6981 . mat - the matrix 6982 6983 Level: advanced 6984 6985 Notes: 6986 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6987 must free the arrays once the matrix has been destroyed and not before. 6988 6989 The `i` and `j` indices are 0 based 6990 6991 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6992 6993 This sets local rows and cannot be used to set off-processor values. 6994 6995 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6996 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6997 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6998 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6999 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7000 communication if it is known that only local entries will be set. 7001 7002 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7003 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7004 @*/ 7005 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7006 { 7007 Mat_MPIAIJ *maij; 7008 7009 PetscFunctionBegin; 7010 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7011 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7012 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7013 PetscCall(MatCreate(comm, mat)); 7014 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7015 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7016 maij = (Mat_MPIAIJ *)(*mat)->data; 7017 7018 (*mat)->preallocated = PETSC_TRUE; 7019 7020 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7021 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7022 7023 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7024 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7025 7026 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7027 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7028 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7029 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7030 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7031 PetscFunctionReturn(PETSC_SUCCESS); 7032 } 7033 7034 typedef struct { 7035 Mat *mp; /* intermediate products */ 7036 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7037 PetscInt cp; /* number of intermediate products */ 7038 7039 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7040 PetscInt *startsj_s, *startsj_r; 7041 PetscScalar *bufa; 7042 Mat P_oth; 7043 7044 /* may take advantage of merging product->B */ 7045 Mat Bloc; /* B-local by merging diag and off-diag */ 7046 7047 /* cusparse does not have support to split between symbolic and numeric phases. 7048 When api_user is true, we don't need to update the numerical values 7049 of the temporary storage */ 7050 PetscBool reusesym; 7051 7052 /* support for COO values insertion */ 7053 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7054 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7055 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7056 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7057 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7058 PetscMemType mtype; 7059 7060 /* customization */ 7061 PetscBool abmerge; 7062 PetscBool P_oth_bind; 7063 } MatMatMPIAIJBACKEND; 7064 7065 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7066 { 7067 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7068 PetscInt i; 7069 7070 PetscFunctionBegin; 7071 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7072 PetscCall(PetscFree(mmdata->bufa)); 7073 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7074 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7075 PetscCall(MatDestroy(&mmdata->P_oth)); 7076 PetscCall(MatDestroy(&mmdata->Bloc)); 7077 PetscCall(PetscSFDestroy(&mmdata->sf)); 7078 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7079 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7080 PetscCall(PetscFree(mmdata->own[0])); 7081 PetscCall(PetscFree(mmdata->own)); 7082 PetscCall(PetscFree(mmdata->off[0])); 7083 PetscCall(PetscFree(mmdata->off)); 7084 PetscCall(PetscFree(mmdata)); 7085 PetscFunctionReturn(PETSC_SUCCESS); 7086 } 7087 7088 /* Copy selected n entries with indices in idx[] of A to v[]. 7089 If idx is NULL, copy the whole data array of A to v[] 7090 */ 7091 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7092 { 7093 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7094 7095 PetscFunctionBegin; 7096 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7097 if (f) { 7098 PetscCall((*f)(A, n, idx, v)); 7099 } else { 7100 const PetscScalar *vv; 7101 7102 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7103 if (n && idx) { 7104 PetscScalar *w = v; 7105 const PetscInt *oi = idx; 7106 PetscInt j; 7107 7108 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7109 } else { 7110 PetscCall(PetscArraycpy(v, vv, n)); 7111 } 7112 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7113 } 7114 PetscFunctionReturn(PETSC_SUCCESS); 7115 } 7116 7117 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7118 { 7119 MatMatMPIAIJBACKEND *mmdata; 7120 PetscInt i, n_d, n_o; 7121 7122 PetscFunctionBegin; 7123 MatCheckProduct(C, 1); 7124 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7125 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7126 if (!mmdata->reusesym) { /* update temporary matrices */ 7127 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7128 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7129 } 7130 mmdata->reusesym = PETSC_FALSE; 7131 7132 for (i = 0; i < mmdata->cp; i++) { 7133 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7134 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7135 } 7136 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7137 PetscInt noff; 7138 7139 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7140 if (mmdata->mptmp[i]) continue; 7141 if (noff) { 7142 PetscInt nown; 7143 7144 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7145 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7146 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7147 n_o += noff; 7148 n_d += nown; 7149 } else { 7150 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7151 7152 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7153 n_d += mm->nz; 7154 } 7155 } 7156 if (mmdata->hasoffproc) { /* offprocess insertion */ 7157 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7158 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7159 } 7160 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7161 PetscFunctionReturn(PETSC_SUCCESS); 7162 } 7163 7164 /* Support for Pt * A, A * P, or Pt * A * P */ 7165 #define MAX_NUMBER_INTERMEDIATE 4 7166 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7167 { 7168 Mat_Product *product = C->product; 7169 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7170 Mat_MPIAIJ *a, *p; 7171 MatMatMPIAIJBACKEND *mmdata; 7172 ISLocalToGlobalMapping P_oth_l2g = NULL; 7173 IS glob = NULL; 7174 const char *prefix; 7175 char pprefix[256]; 7176 const PetscInt *globidx, *P_oth_idx; 7177 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7178 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7179 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7180 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7181 /* a base offset; type-2: sparse with a local to global map table */ 7182 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7183 7184 MatProductType ptype; 7185 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7186 PetscMPIInt size; 7187 7188 PetscFunctionBegin; 7189 MatCheckProduct(C, 1); 7190 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7191 ptype = product->type; 7192 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7193 ptype = MATPRODUCT_AB; 7194 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7195 } 7196 switch (ptype) { 7197 case MATPRODUCT_AB: 7198 A = product->A; 7199 P = product->B; 7200 m = A->rmap->n; 7201 n = P->cmap->n; 7202 M = A->rmap->N; 7203 N = P->cmap->N; 7204 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7205 break; 7206 case MATPRODUCT_AtB: 7207 P = product->A; 7208 A = product->B; 7209 m = P->cmap->n; 7210 n = A->cmap->n; 7211 M = P->cmap->N; 7212 N = A->cmap->N; 7213 hasoffproc = PETSC_TRUE; 7214 break; 7215 case MATPRODUCT_PtAP: 7216 A = product->A; 7217 P = product->B; 7218 m = P->cmap->n; 7219 n = P->cmap->n; 7220 M = P->cmap->N; 7221 N = P->cmap->N; 7222 hasoffproc = PETSC_TRUE; 7223 break; 7224 default: 7225 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7226 } 7227 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7228 if (size == 1) hasoffproc = PETSC_FALSE; 7229 7230 /* defaults */ 7231 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7232 mp[i] = NULL; 7233 mptmp[i] = PETSC_FALSE; 7234 rmapt[i] = -1; 7235 cmapt[i] = -1; 7236 rmapa[i] = NULL; 7237 cmapa[i] = NULL; 7238 } 7239 7240 /* customization */ 7241 PetscCall(PetscNew(&mmdata)); 7242 mmdata->reusesym = product->api_user; 7243 if (ptype == MATPRODUCT_AB) { 7244 if (product->api_user) { 7245 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7246 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7247 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7248 PetscOptionsEnd(); 7249 } else { 7250 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7251 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7252 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7253 PetscOptionsEnd(); 7254 } 7255 } else if (ptype == MATPRODUCT_PtAP) { 7256 if (product->api_user) { 7257 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7258 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7259 PetscOptionsEnd(); 7260 } else { 7261 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7262 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7263 PetscOptionsEnd(); 7264 } 7265 } 7266 a = (Mat_MPIAIJ *)A->data; 7267 p = (Mat_MPIAIJ *)P->data; 7268 PetscCall(MatSetSizes(C, m, n, M, N)); 7269 PetscCall(PetscLayoutSetUp(C->rmap)); 7270 PetscCall(PetscLayoutSetUp(C->cmap)); 7271 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7272 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7273 7274 cp = 0; 7275 switch (ptype) { 7276 case MATPRODUCT_AB: /* A * P */ 7277 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7278 7279 /* A_diag * P_local (merged or not) */ 7280 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7281 /* P is product->B */ 7282 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7283 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7284 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7285 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7286 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7287 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7288 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7289 mp[cp]->product->api_user = product->api_user; 7290 PetscCall(MatProductSetFromOptions(mp[cp])); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 PetscCall(ISGetIndices(glob, &globidx)); 7293 rmapt[cp] = 1; 7294 cmapt[cp] = 2; 7295 cmapa[cp] = globidx; 7296 mptmp[cp] = PETSC_FALSE; 7297 cp++; 7298 } else { /* A_diag * P_diag and A_diag * P_off */ 7299 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 rmapt[cp] = 1; 7309 cmapt[cp] = 1; 7310 mptmp[cp] = PETSC_FALSE; 7311 cp++; 7312 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7313 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7314 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7315 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7316 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7317 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7318 mp[cp]->product->api_user = product->api_user; 7319 PetscCall(MatProductSetFromOptions(mp[cp])); 7320 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7321 rmapt[cp] = 1; 7322 cmapt[cp] = 2; 7323 cmapa[cp] = p->garray; 7324 mptmp[cp] = PETSC_FALSE; 7325 cp++; 7326 } 7327 7328 /* A_off * P_other */ 7329 if (mmdata->P_oth) { 7330 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7331 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7332 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7333 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7334 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7335 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7336 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7337 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7338 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7339 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7340 mp[cp]->product->api_user = product->api_user; 7341 PetscCall(MatProductSetFromOptions(mp[cp])); 7342 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7343 rmapt[cp] = 1; 7344 cmapt[cp] = 2; 7345 cmapa[cp] = P_oth_idx; 7346 mptmp[cp] = PETSC_FALSE; 7347 cp++; 7348 } 7349 break; 7350 7351 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7352 /* A is product->B */ 7353 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7354 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7355 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7356 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7357 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7358 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7359 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7360 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7361 mp[cp]->product->api_user = product->api_user; 7362 PetscCall(MatProductSetFromOptions(mp[cp])); 7363 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7364 PetscCall(ISGetIndices(glob, &globidx)); 7365 rmapt[cp] = 2; 7366 rmapa[cp] = globidx; 7367 cmapt[cp] = 2; 7368 cmapa[cp] = globidx; 7369 mptmp[cp] = PETSC_FALSE; 7370 cp++; 7371 } else { 7372 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7373 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7374 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7375 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7376 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7377 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7378 mp[cp]->product->api_user = product->api_user; 7379 PetscCall(MatProductSetFromOptions(mp[cp])); 7380 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7381 PetscCall(ISGetIndices(glob, &globidx)); 7382 rmapt[cp] = 1; 7383 cmapt[cp] = 2; 7384 cmapa[cp] = globidx; 7385 mptmp[cp] = PETSC_FALSE; 7386 cp++; 7387 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7388 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7389 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7390 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7391 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7392 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7393 mp[cp]->product->api_user = product->api_user; 7394 PetscCall(MatProductSetFromOptions(mp[cp])); 7395 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7396 rmapt[cp] = 2; 7397 rmapa[cp] = p->garray; 7398 cmapt[cp] = 2; 7399 cmapa[cp] = globidx; 7400 mptmp[cp] = PETSC_FALSE; 7401 cp++; 7402 } 7403 break; 7404 case MATPRODUCT_PtAP: 7405 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7406 /* P is product->B */ 7407 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7408 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7409 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7410 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7411 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7412 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7413 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7414 mp[cp]->product->api_user = product->api_user; 7415 PetscCall(MatProductSetFromOptions(mp[cp])); 7416 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7417 PetscCall(ISGetIndices(glob, &globidx)); 7418 rmapt[cp] = 2; 7419 rmapa[cp] = globidx; 7420 cmapt[cp] = 2; 7421 cmapa[cp] = globidx; 7422 mptmp[cp] = PETSC_FALSE; 7423 cp++; 7424 if (mmdata->P_oth) { 7425 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7426 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7427 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7428 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7429 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7430 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7431 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7432 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7433 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7434 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7435 mp[cp]->product->api_user = product->api_user; 7436 PetscCall(MatProductSetFromOptions(mp[cp])); 7437 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7438 mptmp[cp] = PETSC_TRUE; 7439 cp++; 7440 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7441 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7442 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7443 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7444 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7445 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7446 mp[cp]->product->api_user = product->api_user; 7447 PetscCall(MatProductSetFromOptions(mp[cp])); 7448 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7449 rmapt[cp] = 2; 7450 rmapa[cp] = globidx; 7451 cmapt[cp] = 2; 7452 cmapa[cp] = P_oth_idx; 7453 mptmp[cp] = PETSC_FALSE; 7454 cp++; 7455 } 7456 break; 7457 default: 7458 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7459 } 7460 /* sanity check */ 7461 if (size > 1) 7462 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7463 7464 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7465 for (i = 0; i < cp; i++) { 7466 mmdata->mp[i] = mp[i]; 7467 mmdata->mptmp[i] = mptmp[i]; 7468 } 7469 mmdata->cp = cp; 7470 C->product->data = mmdata; 7471 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7472 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7473 7474 /* memory type */ 7475 mmdata->mtype = PETSC_MEMTYPE_HOST; 7476 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7477 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7478 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7479 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7480 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7481 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7482 7483 /* prepare coo coordinates for values insertion */ 7484 7485 /* count total nonzeros of those intermediate seqaij Mats 7486 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7487 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7488 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7489 */ 7490 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7491 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7492 if (mptmp[cp]) continue; 7493 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7494 const PetscInt *rmap = rmapa[cp]; 7495 const PetscInt mr = mp[cp]->rmap->n; 7496 const PetscInt rs = C->rmap->rstart; 7497 const PetscInt re = C->rmap->rend; 7498 const PetscInt *ii = mm->i; 7499 for (i = 0; i < mr; i++) { 7500 const PetscInt gr = rmap[i]; 7501 const PetscInt nz = ii[i + 1] - ii[i]; 7502 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7503 else ncoo_oown += nz; /* this row is local */ 7504 } 7505 } else ncoo_d += mm->nz; 7506 } 7507 7508 /* 7509 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7510 7511 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7512 7513 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7514 7515 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7516 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7517 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7518 7519 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7520 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7521 */ 7522 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7523 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7524 7525 /* gather (i,j) of nonzeros inserted by remote procs */ 7526 if (hasoffproc) { 7527 PetscSF msf; 7528 PetscInt ncoo2, *coo_i2, *coo_j2; 7529 7530 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7531 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7532 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7533 7534 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7535 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7536 PetscInt *idxoff = mmdata->off[cp]; 7537 PetscInt *idxown = mmdata->own[cp]; 7538 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7539 const PetscInt *rmap = rmapa[cp]; 7540 const PetscInt *cmap = cmapa[cp]; 7541 const PetscInt *ii = mm->i; 7542 PetscInt *coi = coo_i + ncoo_o; 7543 PetscInt *coj = coo_j + ncoo_o; 7544 const PetscInt mr = mp[cp]->rmap->n; 7545 const PetscInt rs = C->rmap->rstart; 7546 const PetscInt re = C->rmap->rend; 7547 const PetscInt cs = C->cmap->rstart; 7548 for (i = 0; i < mr; i++) { 7549 const PetscInt *jj = mm->j + ii[i]; 7550 const PetscInt gr = rmap[i]; 7551 const PetscInt nz = ii[i + 1] - ii[i]; 7552 if (gr < rs || gr >= re) { /* this is an offproc row */ 7553 for (j = ii[i]; j < ii[i + 1]; j++) { 7554 *coi++ = gr; 7555 *idxoff++ = j; 7556 } 7557 if (!cmapt[cp]) { /* already global */ 7558 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7559 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7560 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7561 } else { /* offdiag */ 7562 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7563 } 7564 ncoo_o += nz; 7565 } else { /* this is a local row */ 7566 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7567 } 7568 } 7569 } 7570 mmdata->off[cp + 1] = idxoff; 7571 mmdata->own[cp + 1] = idxown; 7572 } 7573 7574 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7575 PetscInt incoo_o; 7576 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7577 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7578 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7579 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7580 ncoo = ncoo_d + ncoo_oown + ncoo2; 7581 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7582 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7583 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7584 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7585 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7586 PetscCall(PetscFree2(coo_i, coo_j)); 7587 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7588 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7589 coo_i = coo_i2; 7590 coo_j = coo_j2; 7591 } else { /* no offproc values insertion */ 7592 ncoo = ncoo_d; 7593 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7594 7595 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7596 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7597 PetscCall(PetscSFSetUp(mmdata->sf)); 7598 } 7599 mmdata->hasoffproc = hasoffproc; 7600 7601 /* gather (i,j) of nonzeros inserted locally */ 7602 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7603 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7604 PetscInt *coi = coo_i + ncoo_d; 7605 PetscInt *coj = coo_j + ncoo_d; 7606 const PetscInt *jj = mm->j; 7607 const PetscInt *ii = mm->i; 7608 const PetscInt *cmap = cmapa[cp]; 7609 const PetscInt *rmap = rmapa[cp]; 7610 const PetscInt mr = mp[cp]->rmap->n; 7611 const PetscInt rs = C->rmap->rstart; 7612 const PetscInt re = C->rmap->rend; 7613 const PetscInt cs = C->cmap->rstart; 7614 7615 if (mptmp[cp]) continue; 7616 if (rmapt[cp] == 1) { /* consecutive rows */ 7617 /* fill coo_i */ 7618 for (i = 0; i < mr; i++) { 7619 const PetscInt gr = i + rs; 7620 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7621 } 7622 /* fill coo_j */ 7623 if (!cmapt[cp]) { /* type-0, already global */ 7624 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7625 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7626 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7627 } else { /* type-2, local to global for sparse columns */ 7628 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7629 } 7630 ncoo_d += mm->nz; 7631 } else if (rmapt[cp] == 2) { /* sparse rows */ 7632 for (i = 0; i < mr; i++) { 7633 const PetscInt *jj = mm->j + ii[i]; 7634 const PetscInt gr = rmap[i]; 7635 const PetscInt nz = ii[i + 1] - ii[i]; 7636 if (gr >= rs && gr < re) { /* local rows */ 7637 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7638 if (!cmapt[cp]) { /* type-0, already global */ 7639 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7640 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7641 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7642 } else { /* type-2, local to global for sparse columns */ 7643 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7644 } 7645 ncoo_d += nz; 7646 } 7647 } 7648 } 7649 } 7650 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7651 PetscCall(ISDestroy(&glob)); 7652 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7653 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7654 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7655 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7656 7657 /* set block sizes */ 7658 A = product->A; 7659 P = product->B; 7660 switch (ptype) { 7661 case MATPRODUCT_PtAP: 7662 if (P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7663 break; 7664 case MATPRODUCT_RARt: 7665 if (P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7666 break; 7667 case MATPRODUCT_ABC: 7668 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7669 break; 7670 case MATPRODUCT_AB: 7671 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7672 break; 7673 case MATPRODUCT_AtB: 7674 if (A->cmap->bs > 1 || P->cmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7675 break; 7676 case MATPRODUCT_ABt: 7677 if (A->rmap->bs > 1 || P->rmap->bs > 1) PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7678 break; 7679 default: 7680 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7681 } 7682 7683 /* preallocate with COO data */ 7684 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7685 PetscCall(PetscFree2(coo_i, coo_j)); 7686 PetscFunctionReturn(PETSC_SUCCESS); 7687 } 7688 7689 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7690 { 7691 Mat_Product *product = mat->product; 7692 #if defined(PETSC_HAVE_DEVICE) 7693 PetscBool match = PETSC_FALSE; 7694 PetscBool usecpu = PETSC_FALSE; 7695 #else 7696 PetscBool match = PETSC_TRUE; 7697 #endif 7698 7699 PetscFunctionBegin; 7700 MatCheckProduct(mat, 1); 7701 #if defined(PETSC_HAVE_DEVICE) 7702 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7703 if (match) { /* we can always fallback to the CPU if requested */ 7704 switch (product->type) { 7705 case MATPRODUCT_AB: 7706 if (product->api_user) { 7707 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7708 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7709 PetscOptionsEnd(); 7710 } else { 7711 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7712 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7713 PetscOptionsEnd(); 7714 } 7715 break; 7716 case MATPRODUCT_AtB: 7717 if (product->api_user) { 7718 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7719 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7720 PetscOptionsEnd(); 7721 } else { 7722 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7723 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7724 PetscOptionsEnd(); 7725 } 7726 break; 7727 case MATPRODUCT_PtAP: 7728 if (product->api_user) { 7729 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7730 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7731 PetscOptionsEnd(); 7732 } else { 7733 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7734 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7735 PetscOptionsEnd(); 7736 } 7737 break; 7738 default: 7739 break; 7740 } 7741 match = (PetscBool)!usecpu; 7742 } 7743 #endif 7744 if (match) { 7745 switch (product->type) { 7746 case MATPRODUCT_AB: 7747 case MATPRODUCT_AtB: 7748 case MATPRODUCT_PtAP: 7749 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7750 break; 7751 default: 7752 break; 7753 } 7754 } 7755 /* fallback to MPIAIJ ops */ 7756 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7757 PetscFunctionReturn(PETSC_SUCCESS); 7758 } 7759 7760 /* 7761 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7762 7763 n - the number of block indices in cc[] 7764 cc - the block indices (must be large enough to contain the indices) 7765 */ 7766 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7767 { 7768 PetscInt cnt = -1, nidx, j; 7769 const PetscInt *idx; 7770 7771 PetscFunctionBegin; 7772 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7773 if (nidx) { 7774 cnt = 0; 7775 cc[cnt] = idx[0] / bs; 7776 for (j = 1; j < nidx; j++) { 7777 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7778 } 7779 } 7780 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7781 *n = cnt + 1; 7782 PetscFunctionReturn(PETSC_SUCCESS); 7783 } 7784 7785 /* 7786 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7787 7788 ncollapsed - the number of block indices 7789 collapsed - the block indices (must be large enough to contain the indices) 7790 */ 7791 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7792 { 7793 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7794 7795 PetscFunctionBegin; 7796 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7797 for (i = start + 1; i < start + bs; i++) { 7798 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7799 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7800 cprevtmp = cprev; 7801 cprev = merged; 7802 merged = cprevtmp; 7803 } 7804 *ncollapsed = nprev; 7805 if (collapsed) *collapsed = cprev; 7806 PetscFunctionReturn(PETSC_SUCCESS); 7807 } 7808 7809 /* 7810 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7811 7812 Input Parameter: 7813 . Amat - matrix 7814 - symmetrize - make the result symmetric 7815 + scale - scale with diagonal 7816 7817 Output Parameter: 7818 . a_Gmat - output scalar graph >= 0 7819 7820 */ 7821 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7822 { 7823 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7824 MPI_Comm comm; 7825 Mat Gmat; 7826 PetscBool ismpiaij, isseqaij; 7827 Mat a, b, c; 7828 MatType jtype; 7829 7830 PetscFunctionBegin; 7831 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7832 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7833 PetscCall(MatGetSize(Amat, &MM, &NN)); 7834 PetscCall(MatGetBlockSize(Amat, &bs)); 7835 nloc = (Iend - Istart) / bs; 7836 7837 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7838 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7839 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7840 7841 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7842 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7843 implementation */ 7844 if (bs > 1) { 7845 PetscCall(MatGetType(Amat, &jtype)); 7846 PetscCall(MatCreate(comm, &Gmat)); 7847 PetscCall(MatSetType(Gmat, jtype)); 7848 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7849 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7850 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7851 PetscInt *d_nnz, *o_nnz; 7852 MatScalar *aa, val, *AA; 7853 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7854 7855 if (isseqaij) { 7856 a = Amat; 7857 b = NULL; 7858 } else { 7859 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7860 a = d->A; 7861 b = d->B; 7862 } 7863 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7864 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7865 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7866 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7867 const PetscInt *cols1, *cols2; 7868 7869 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7870 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7871 nnz[brow / bs] = nc2 / bs; 7872 if (nc2 % bs) ok = 0; 7873 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7874 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7875 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7876 if (nc1 != nc2) ok = 0; 7877 else { 7878 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7879 if (cols1[jj] != cols2[jj]) ok = 0; 7880 if (cols1[jj] % bs != jj % bs) ok = 0; 7881 } 7882 } 7883 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7884 } 7885 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7886 if (!ok) { 7887 PetscCall(PetscFree2(d_nnz, o_nnz)); 7888 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7889 goto old_bs; 7890 } 7891 } 7892 } 7893 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7894 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7895 PetscCall(PetscFree2(d_nnz, o_nnz)); 7896 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7897 // diag 7898 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7899 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7900 7901 ai = aseq->i; 7902 n = ai[brow + 1] - ai[brow]; 7903 aj = aseq->j + ai[brow]; 7904 for (PetscInt k = 0; k < n; k += bs) { // block columns 7905 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7906 val = 0; 7907 if (index_size == 0) { 7908 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7909 aa = aseq->a + ai[brow + ii] + k; 7910 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7911 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7912 } 7913 } 7914 } else { // use (index,index) value if provided 7915 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7916 PetscInt ii = index[iii]; 7917 aa = aseq->a + ai[brow + ii] + k; 7918 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7919 PetscInt jj = index[jjj]; 7920 val += PetscAbs(PetscRealPart(aa[jj])); 7921 } 7922 } 7923 } 7924 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7925 AA[k / bs] = val; 7926 } 7927 grow = Istart / bs + brow / bs; 7928 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7929 } 7930 // off-diag 7931 if (ismpiaij) { 7932 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7933 const PetscScalar *vals; 7934 const PetscInt *cols, *garray = aij->garray; 7935 7936 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7937 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7938 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7939 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7940 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7941 AA[k / bs] = 0; 7942 AJ[cidx] = garray[cols[k]] / bs; 7943 } 7944 nc = ncols / bs; 7945 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7946 if (index_size == 0) { 7947 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7948 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7949 for (PetscInt k = 0; k < ncols; k += bs) { 7950 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7951 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7952 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7953 } 7954 } 7955 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7956 } 7957 } else { // use (index,index) value if provided 7958 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7959 PetscInt ii = index[iii]; 7960 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7961 for (PetscInt k = 0; k < ncols; k += bs) { 7962 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7963 PetscInt jj = index[jjj]; 7964 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7965 } 7966 } 7967 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7968 } 7969 } 7970 grow = Istart / bs + brow / bs; 7971 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7972 } 7973 } 7974 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7975 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7976 PetscCall(PetscFree2(AA, AJ)); 7977 } else { 7978 const PetscScalar *vals; 7979 const PetscInt *idx; 7980 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7981 old_bs: 7982 /* 7983 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7984 */ 7985 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7986 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7987 if (isseqaij) { 7988 PetscInt max_d_nnz; 7989 7990 /* 7991 Determine exact preallocation count for (sequential) scalar matrix 7992 */ 7993 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7994 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7995 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7996 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7997 PetscCall(PetscFree3(w0, w1, w2)); 7998 } else if (ismpiaij) { 7999 Mat Daij, Oaij; 8000 const PetscInt *garray; 8001 PetscInt max_d_nnz; 8002 8003 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 8004 /* 8005 Determine exact preallocation count for diagonal block portion of scalar matrix 8006 */ 8007 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 8008 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 8009 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 8010 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 8011 PetscCall(PetscFree3(w0, w1, w2)); 8012 /* 8013 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 8014 */ 8015 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 8016 o_nnz[jj] = 0; 8017 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 8018 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8019 o_nnz[jj] += ncols; 8020 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8021 } 8022 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8023 } 8024 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8025 /* get scalar copy (norms) of matrix */ 8026 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8027 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8028 PetscCall(PetscFree2(d_nnz, o_nnz)); 8029 for (Ii = Istart; Ii < Iend; Ii++) { 8030 PetscInt dest_row = Ii / bs; 8031 8032 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8033 for (jj = 0; jj < ncols; jj++) { 8034 PetscInt dest_col = idx[jj] / bs; 8035 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8036 8037 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8038 } 8039 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8040 } 8041 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8042 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8043 } 8044 } else { 8045 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8046 else { 8047 Gmat = Amat; 8048 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8049 } 8050 if (isseqaij) { 8051 a = Gmat; 8052 b = NULL; 8053 } else { 8054 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8055 a = d->A; 8056 b = d->B; 8057 } 8058 if (filter >= 0 || scale) { 8059 /* take absolute value of each entry */ 8060 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8061 MatInfo info; 8062 PetscScalar *avals; 8063 8064 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8065 PetscCall(MatSeqAIJGetArray(c, &avals)); 8066 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8067 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8068 } 8069 } 8070 } 8071 if (symmetrize) { 8072 PetscBool isset, issym; 8073 8074 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8075 if (!isset || !issym) { 8076 Mat matTrans; 8077 8078 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8079 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8080 PetscCall(MatDestroy(&matTrans)); 8081 } 8082 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8083 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8084 if (scale) { 8085 /* scale c for all diagonal values = 1 or -1 */ 8086 Vec diag; 8087 8088 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8089 PetscCall(MatGetDiagonal(Gmat, diag)); 8090 PetscCall(VecReciprocal(diag)); 8091 PetscCall(VecSqrtAbs(diag)); 8092 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8093 PetscCall(VecDestroy(&diag)); 8094 } 8095 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8096 if (filter >= 0) { 8097 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8098 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8099 } 8100 *a_Gmat = Gmat; 8101 PetscFunctionReturn(PETSC_SUCCESS); 8102 } 8103 8104 /* 8105 Special version for direct calls from Fortran 8106 */ 8107 8108 /* Change these macros so can be used in void function */ 8109 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8110 #undef PetscCall 8111 #define PetscCall(...) \ 8112 do { \ 8113 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8114 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8115 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8116 return; \ 8117 } \ 8118 } while (0) 8119 8120 #undef SETERRQ 8121 #define SETERRQ(comm, ierr, ...) \ 8122 do { \ 8123 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8124 return; \ 8125 } while (0) 8126 8127 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8128 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8129 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8130 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8131 #else 8132 #endif 8133 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8134 { 8135 Mat mat = *mmat; 8136 PetscInt m = *mm, n = *mn; 8137 InsertMode addv = *maddv; 8138 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8139 PetscScalar value; 8140 8141 MatCheckPreallocated(mat, 1); 8142 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8143 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8144 { 8145 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8146 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8147 PetscBool roworiented = aij->roworiented; 8148 8149 /* Some Variables required in the macro */ 8150 Mat A = aij->A; 8151 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8152 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8153 MatScalar *aa; 8154 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8155 Mat B = aij->B; 8156 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8157 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8158 MatScalar *ba; 8159 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8160 * cannot use "#if defined" inside a macro. */ 8161 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8162 8163 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8164 PetscInt nonew = a->nonew; 8165 MatScalar *ap1, *ap2; 8166 8167 PetscFunctionBegin; 8168 PetscCall(MatSeqAIJGetArray(A, &aa)); 8169 PetscCall(MatSeqAIJGetArray(B, &ba)); 8170 for (i = 0; i < m; i++) { 8171 if (im[i] < 0) continue; 8172 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8173 if (im[i] >= rstart && im[i] < rend) { 8174 row = im[i] - rstart; 8175 lastcol1 = -1; 8176 rp1 = aj + ai[row]; 8177 ap1 = aa + ai[row]; 8178 rmax1 = aimax[row]; 8179 nrow1 = ailen[row]; 8180 low1 = 0; 8181 high1 = nrow1; 8182 lastcol2 = -1; 8183 rp2 = bj + bi[row]; 8184 ap2 = ba + bi[row]; 8185 rmax2 = bimax[row]; 8186 nrow2 = bilen[row]; 8187 low2 = 0; 8188 high2 = nrow2; 8189 8190 for (j = 0; j < n; j++) { 8191 if (roworiented) value = v[i * n + j]; 8192 else value = v[i + j * m]; 8193 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8194 if (in[j] >= cstart && in[j] < cend) { 8195 col = in[j] - cstart; 8196 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8197 } else if (in[j] < 0) continue; 8198 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8199 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8200 } else { 8201 if (mat->was_assembled) { 8202 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8203 #if defined(PETSC_USE_CTABLE) 8204 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8205 col--; 8206 #else 8207 col = aij->colmap[in[j]] - 1; 8208 #endif 8209 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8210 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8211 col = in[j]; 8212 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8213 B = aij->B; 8214 b = (Mat_SeqAIJ *)B->data; 8215 bimax = b->imax; 8216 bi = b->i; 8217 bilen = b->ilen; 8218 bj = b->j; 8219 rp2 = bj + bi[row]; 8220 ap2 = ba + bi[row]; 8221 rmax2 = bimax[row]; 8222 nrow2 = bilen[row]; 8223 low2 = 0; 8224 high2 = nrow2; 8225 bm = aij->B->rmap->n; 8226 ba = b->a; 8227 inserted = PETSC_FALSE; 8228 } 8229 } else col = in[j]; 8230 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8231 } 8232 } 8233 } else if (!aij->donotstash) { 8234 if (roworiented) { 8235 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8236 } else { 8237 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8238 } 8239 } 8240 } 8241 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8242 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8243 } 8244 PetscFunctionReturnVoid(); 8245 } 8246 8247 /* Undefining these here since they were redefined from their original definition above! No 8248 * other PETSc functions should be defined past this point, as it is impossible to recover the 8249 * original definitions */ 8250 #undef PetscCall 8251 #undef SETERRQ 8252