1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_FORCE_DIAGONAL_ENTRIES: 1691 case MAT_SORTED_FULL: 1692 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1693 break; 1694 case MAT_IGNORE_OFF_PROC_ENTRIES: 1695 a->donotstash = flg; 1696 break; 1697 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1698 case MAT_SPD: 1699 case MAT_SYMMETRIC: 1700 case MAT_STRUCTURALLY_SYMMETRIC: 1701 case MAT_HERMITIAN: 1702 case MAT_SYMMETRY_ETERNAL: 1703 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1704 case MAT_SPD_ETERNAL: 1705 /* if the diagonal matrix is square it inherits some of the properties above */ 1706 break; 1707 case MAT_SUBMAT_SINGLEIS: 1708 A->submat_singleis = flg; 1709 break; 1710 case MAT_STRUCTURE_ONLY: 1711 /* The option is handled directly by MatSetOption() */ 1712 break; 1713 default: 1714 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1715 } 1716 PetscFunctionReturn(PETSC_SUCCESS); 1717 } 1718 1719 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1720 { 1721 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1722 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1723 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1724 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1725 PetscInt *cmap, *idx_p; 1726 1727 PetscFunctionBegin; 1728 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1729 mat->getrowactive = PETSC_TRUE; 1730 1731 if (!mat->rowvalues && (idx || v)) { 1732 /* 1733 allocate enough space to hold information from the longest row. 1734 */ 1735 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1736 PetscInt max = 1, tmp; 1737 for (i = 0; i < matin->rmap->n; i++) { 1738 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1739 if (max < tmp) max = tmp; 1740 } 1741 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1742 } 1743 1744 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1745 lrow = row - rstart; 1746 1747 pvA = &vworkA; 1748 pcA = &cworkA; 1749 pvB = &vworkB; 1750 pcB = &cworkB; 1751 if (!v) { 1752 pvA = NULL; 1753 pvB = NULL; 1754 } 1755 if (!idx) { 1756 pcA = NULL; 1757 if (!v) pcB = NULL; 1758 } 1759 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1760 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1761 nztot = nzA + nzB; 1762 1763 cmap = mat->garray; 1764 if (v || idx) { 1765 if (nztot) { 1766 /* Sort by increasing column numbers, assuming A and B already sorted */ 1767 PetscInt imark = -1; 1768 if (v) { 1769 *v = v_p = mat->rowvalues; 1770 for (i = 0; i < nzB; i++) { 1771 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1772 else break; 1773 } 1774 imark = i; 1775 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1776 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1777 } 1778 if (idx) { 1779 *idx = idx_p = mat->rowindices; 1780 if (imark > -1) { 1781 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1782 } else { 1783 for (i = 0; i < nzB; i++) { 1784 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1785 else break; 1786 } 1787 imark = i; 1788 } 1789 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1790 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1791 } 1792 } else { 1793 if (idx) *idx = NULL; 1794 if (v) *v = NULL; 1795 } 1796 } 1797 *nz = nztot; 1798 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1799 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1800 PetscFunctionReturn(PETSC_SUCCESS); 1801 } 1802 1803 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1804 { 1805 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1806 1807 PetscFunctionBegin; 1808 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1809 aij->getrowactive = PETSC_FALSE; 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1814 { 1815 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1816 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1817 PetscInt i, j, cstart = mat->cmap->rstart; 1818 PetscReal sum = 0.0; 1819 const MatScalar *v, *amata, *bmata; 1820 1821 PetscFunctionBegin; 1822 if (aij->size == 1) { 1823 PetscCall(MatNorm(aij->A, type, norm)); 1824 } else { 1825 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1826 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1827 if (type == NORM_FROBENIUS) { 1828 v = amata; 1829 for (i = 0; i < amat->nz; i++) { 1830 sum += PetscRealPart(PetscConj(*v) * (*v)); 1831 v++; 1832 } 1833 v = bmata; 1834 for (i = 0; i < bmat->nz; i++) { 1835 sum += PetscRealPart(PetscConj(*v) * (*v)); 1836 v++; 1837 } 1838 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1839 *norm = PetscSqrtReal(*norm); 1840 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1841 } else if (type == NORM_1) { /* max column norm */ 1842 PetscReal *tmp; 1843 PetscInt *jj, *garray = aij->garray; 1844 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1845 *norm = 0.0; 1846 v = amata; 1847 jj = amat->j; 1848 for (j = 0; j < amat->nz; j++) { 1849 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 v = bmata; 1853 jj = bmat->j; 1854 for (j = 0; j < bmat->nz; j++) { 1855 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1856 v++; 1857 } 1858 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1859 for (j = 0; j < mat->cmap->N; j++) { 1860 if (tmp[j] > *norm) *norm = tmp[j]; 1861 } 1862 PetscCall(PetscFree(tmp)); 1863 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1864 } else if (type == NORM_INFINITY) { /* max row norm */ 1865 PetscReal ntemp = 0.0; 1866 for (j = 0; j < aij->A->rmap->n; j++) { 1867 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1868 sum = 0.0; 1869 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1874 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1875 sum += PetscAbsScalar(*v); 1876 v++; 1877 } 1878 if (sum > ntemp) ntemp = sum; 1879 } 1880 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1881 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1882 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1883 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1884 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1885 } 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1890 { 1891 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1892 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1893 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1894 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1895 Mat B, A_diag, *B_diag; 1896 const MatScalar *pbv, *bv; 1897 1898 PetscFunctionBegin; 1899 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1900 ma = A->rmap->n; 1901 na = A->cmap->n; 1902 mb = a->B->rmap->n; 1903 nb = a->B->cmap->n; 1904 ai = Aloc->i; 1905 aj = Aloc->j; 1906 bi = Bloc->i; 1907 bj = Bloc->j; 1908 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1909 PetscInt *d_nnz, *g_nnz, *o_nnz; 1910 PetscSFNode *oloc; 1911 PETSC_UNUSED PetscSF sf; 1912 1913 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1914 /* compute d_nnz for preallocation */ 1915 PetscCall(PetscArrayzero(d_nnz, na)); 1916 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1917 /* compute local off-diagonal contributions */ 1918 PetscCall(PetscArrayzero(g_nnz, nb)); 1919 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1920 /* map those to global */ 1921 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1922 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1923 PetscCall(PetscSFSetFromOptions(sf)); 1924 PetscCall(PetscArrayzero(o_nnz, na)); 1925 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1926 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1927 PetscCall(PetscSFDestroy(&sf)); 1928 1929 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1930 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1931 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1932 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1933 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1934 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1935 } else { 1936 B = *matout; 1937 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1938 } 1939 1940 b = (Mat_MPIAIJ *)B->data; 1941 A_diag = a->A; 1942 B_diag = &b->A; 1943 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1944 A_diag_ncol = A_diag->cmap->N; 1945 B_diag_ilen = sub_B_diag->ilen; 1946 B_diag_i = sub_B_diag->i; 1947 1948 /* Set ilen for diagonal of B */ 1949 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1950 1951 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1952 very quickly (=without using MatSetValues), because all writes are local. */ 1953 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1954 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1955 1956 /* copy over the B part */ 1957 PetscCall(PetscMalloc1(bi[mb], &cols)); 1958 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1959 pbv = bv; 1960 row = A->rmap->rstart; 1961 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1962 cols_tmp = cols; 1963 for (i = 0; i < mb; i++) { 1964 ncol = bi[i + 1] - bi[i]; 1965 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1966 row++; 1967 if (pbv) pbv += ncol; 1968 if (cols_tmp) cols_tmp += ncol; 1969 } 1970 PetscCall(PetscFree(cols)); 1971 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1972 1973 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1974 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1975 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1976 *matout = B; 1977 } else { 1978 PetscCall(MatHeaderMerge(A, &B)); 1979 } 1980 PetscFunctionReturn(PETSC_SUCCESS); 1981 } 1982 1983 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1984 { 1985 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1986 Mat a = aij->A, b = aij->B; 1987 PetscInt s1, s2, s3; 1988 1989 PetscFunctionBegin; 1990 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1991 if (rr) { 1992 PetscCall(VecGetLocalSize(rr, &s1)); 1993 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1994 /* Overlap communication with computation. */ 1995 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1996 } 1997 if (ll) { 1998 PetscCall(VecGetLocalSize(ll, &s1)); 1999 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2000 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2001 } 2002 /* scale the diagonal block */ 2003 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2004 2005 if (rr) { 2006 /* Do a scatter end and then right scale the off-diagonal block */ 2007 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2008 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2009 } 2010 PetscFunctionReturn(PETSC_SUCCESS); 2011 } 2012 2013 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2014 { 2015 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2016 2017 PetscFunctionBegin; 2018 PetscCall(MatSetUnfactored(a->A)); 2019 PetscFunctionReturn(PETSC_SUCCESS); 2020 } 2021 2022 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2023 { 2024 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2025 Mat a, b, c, d; 2026 PetscBool flg; 2027 2028 PetscFunctionBegin; 2029 a = matA->A; 2030 b = matA->B; 2031 c = matB->A; 2032 d = matB->B; 2033 2034 PetscCall(MatEqual(a, c, &flg)); 2035 if (flg) PetscCall(MatEqual(b, d, &flg)); 2036 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2037 PetscFunctionReturn(PETSC_SUCCESS); 2038 } 2039 2040 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2041 { 2042 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2043 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2044 2045 PetscFunctionBegin; 2046 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2047 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2048 /* because of the column compression in the off-processor part of the matrix a->B, 2049 the number of columns in a->B and b->B may be different, hence we cannot call 2050 the MatCopy() directly on the two parts. If need be, we can provide a more 2051 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2052 then copying the submatrices */ 2053 PetscCall(MatCopy_Basic(A, B, str)); 2054 } else { 2055 PetscCall(MatCopy(a->A, b->A, str)); 2056 PetscCall(MatCopy(a->B, b->B, str)); 2057 } 2058 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2059 PetscFunctionReturn(PETSC_SUCCESS); 2060 } 2061 2062 /* 2063 Computes the number of nonzeros per row needed for preallocation when X and Y 2064 have different nonzero structure. 2065 */ 2066 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2067 { 2068 PetscInt i, j, k, nzx, nzy; 2069 2070 PetscFunctionBegin; 2071 /* Set the number of nonzeros in the new matrix */ 2072 for (i = 0; i < m; i++) { 2073 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2074 nzx = xi[i + 1] - xi[i]; 2075 nzy = yi[i + 1] - yi[i]; 2076 nnz[i] = 0; 2077 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2078 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2079 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2080 nnz[i]++; 2081 } 2082 for (; k < nzy; k++) nnz[i]++; 2083 } 2084 PetscFunctionReturn(PETSC_SUCCESS); 2085 } 2086 2087 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2088 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2089 { 2090 PetscInt m = Y->rmap->N; 2091 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2092 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2093 2094 PetscFunctionBegin; 2095 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2096 PetscFunctionReturn(PETSC_SUCCESS); 2097 } 2098 2099 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2100 { 2101 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2102 2103 PetscFunctionBegin; 2104 if (str == SAME_NONZERO_PATTERN) { 2105 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2106 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2107 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2108 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2109 } else { 2110 Mat B; 2111 PetscInt *nnz_d, *nnz_o; 2112 2113 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2114 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2115 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2116 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2117 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2118 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2119 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2120 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2121 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2122 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2123 PetscCall(MatHeaderMerge(Y, &B)); 2124 PetscCall(PetscFree(nnz_d)); 2125 PetscCall(PetscFree(nnz_o)); 2126 } 2127 PetscFunctionReturn(PETSC_SUCCESS); 2128 } 2129 2130 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2131 2132 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2133 { 2134 PetscFunctionBegin; 2135 if (PetscDefined(USE_COMPLEX)) { 2136 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2137 2138 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2139 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2140 } 2141 PetscFunctionReturn(PETSC_SUCCESS); 2142 } 2143 2144 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2145 { 2146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2147 2148 PetscFunctionBegin; 2149 PetscCall(MatRealPart(a->A)); 2150 PetscCall(MatRealPart(a->B)); 2151 PetscFunctionReturn(PETSC_SUCCESS); 2152 } 2153 2154 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2155 { 2156 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2157 2158 PetscFunctionBegin; 2159 PetscCall(MatImaginaryPart(a->A)); 2160 PetscCall(MatImaginaryPart(a->B)); 2161 PetscFunctionReturn(PETSC_SUCCESS); 2162 } 2163 2164 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2165 { 2166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2167 PetscInt i, *idxb = NULL, m = A->rmap->n; 2168 PetscScalar *va, *vv; 2169 Vec vB, vA; 2170 const PetscScalar *vb; 2171 2172 PetscFunctionBegin; 2173 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2174 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2175 2176 PetscCall(VecGetArrayWrite(vA, &va)); 2177 if (idx) { 2178 for (i = 0; i < m; i++) { 2179 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2180 } 2181 } 2182 2183 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2184 PetscCall(PetscMalloc1(m, &idxb)); 2185 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2186 2187 PetscCall(VecGetArrayWrite(v, &vv)); 2188 PetscCall(VecGetArrayRead(vB, &vb)); 2189 for (i = 0; i < m; i++) { 2190 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2191 vv[i] = vb[i]; 2192 if (idx) idx[i] = a->garray[idxb[i]]; 2193 } else { 2194 vv[i] = va[i]; 2195 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2196 } 2197 } 2198 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2199 PetscCall(VecRestoreArrayWrite(vA, &va)); 2200 PetscCall(VecRestoreArrayRead(vB, &vb)); 2201 PetscCall(PetscFree(idxb)); 2202 PetscCall(VecDestroy(&vA)); 2203 PetscCall(VecDestroy(&vB)); 2204 PetscFunctionReturn(PETSC_SUCCESS); 2205 } 2206 2207 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2208 { 2209 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2210 Vec vB, vA; 2211 2212 PetscFunctionBegin; 2213 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2214 PetscCall(MatGetRowSumAbs(a->A, vA)); 2215 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2216 PetscCall(MatGetRowSumAbs(a->B, vB)); 2217 PetscCall(VecAXPY(vA, 1.0, vB)); 2218 PetscCall(VecDestroy(&vB)); 2219 PetscCall(VecCopy(vA, v)); 2220 PetscCall(VecDestroy(&vA)); 2221 PetscFunctionReturn(PETSC_SUCCESS); 2222 } 2223 2224 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2225 { 2226 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2227 PetscInt m = A->rmap->n, n = A->cmap->n; 2228 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2229 PetscInt *cmap = mat->garray; 2230 PetscInt *diagIdx, *offdiagIdx; 2231 Vec diagV, offdiagV; 2232 PetscScalar *a, *diagA, *offdiagA; 2233 const PetscScalar *ba, *bav; 2234 PetscInt r, j, col, ncols, *bi, *bj; 2235 Mat B = mat->B; 2236 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2237 2238 PetscFunctionBegin; 2239 /* When a process holds entire A and other processes have no entry */ 2240 if (A->cmap->N == n) { 2241 PetscCall(VecGetArrayWrite(v, &diagA)); 2242 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2243 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2244 PetscCall(VecDestroy(&diagV)); 2245 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2246 PetscFunctionReturn(PETSC_SUCCESS); 2247 } else if (n == 0) { 2248 if (m) { 2249 PetscCall(VecGetArrayWrite(v, &a)); 2250 for (r = 0; r < m; r++) { 2251 a[r] = 0.0; 2252 if (idx) idx[r] = -1; 2253 } 2254 PetscCall(VecRestoreArrayWrite(v, &a)); 2255 } 2256 PetscFunctionReturn(PETSC_SUCCESS); 2257 } 2258 2259 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2260 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2261 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2262 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2263 2264 /* Get offdiagIdx[] for implicit 0.0 */ 2265 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2266 ba = bav; 2267 bi = b->i; 2268 bj = b->j; 2269 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2270 for (r = 0; r < m; r++) { 2271 ncols = bi[r + 1] - bi[r]; 2272 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2273 offdiagA[r] = *ba; 2274 offdiagIdx[r] = cmap[0]; 2275 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2276 offdiagA[r] = 0.0; 2277 2278 /* Find first hole in the cmap */ 2279 for (j = 0; j < ncols; j++) { 2280 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2281 if (col > j && j < cstart) { 2282 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2283 break; 2284 } else if (col > j + n && j >= cstart) { 2285 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2286 break; 2287 } 2288 } 2289 if (j == ncols && ncols < A->cmap->N - n) { 2290 /* a hole is outside compressed Bcols */ 2291 if (ncols == 0) { 2292 if (cstart) { 2293 offdiagIdx[r] = 0; 2294 } else offdiagIdx[r] = cend; 2295 } else { /* ncols > 0 */ 2296 offdiagIdx[r] = cmap[ncols - 1] + 1; 2297 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2298 } 2299 } 2300 } 2301 2302 for (j = 0; j < ncols; j++) { 2303 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2304 offdiagA[r] = *ba; 2305 offdiagIdx[r] = cmap[*bj]; 2306 } 2307 ba++; 2308 bj++; 2309 } 2310 } 2311 2312 PetscCall(VecGetArrayWrite(v, &a)); 2313 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2314 for (r = 0; r < m; ++r) { 2315 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2316 a[r] = diagA[r]; 2317 if (idx) idx[r] = cstart + diagIdx[r]; 2318 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2319 a[r] = diagA[r]; 2320 if (idx) { 2321 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2322 idx[r] = cstart + diagIdx[r]; 2323 } else idx[r] = offdiagIdx[r]; 2324 } 2325 } else { 2326 a[r] = offdiagA[r]; 2327 if (idx) idx[r] = offdiagIdx[r]; 2328 } 2329 } 2330 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2331 PetscCall(VecRestoreArrayWrite(v, &a)); 2332 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2333 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2334 PetscCall(VecDestroy(&diagV)); 2335 PetscCall(VecDestroy(&offdiagV)); 2336 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2337 PetscFunctionReturn(PETSC_SUCCESS); 2338 } 2339 2340 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2341 { 2342 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2343 PetscInt m = A->rmap->n, n = A->cmap->n; 2344 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2345 PetscInt *cmap = mat->garray; 2346 PetscInt *diagIdx, *offdiagIdx; 2347 Vec diagV, offdiagV; 2348 PetscScalar *a, *diagA, *offdiagA; 2349 const PetscScalar *ba, *bav; 2350 PetscInt r, j, col, ncols, *bi, *bj; 2351 Mat B = mat->B; 2352 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2353 2354 PetscFunctionBegin; 2355 /* When a process holds entire A and other processes have no entry */ 2356 if (A->cmap->N == n) { 2357 PetscCall(VecGetArrayWrite(v, &diagA)); 2358 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2359 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2360 PetscCall(VecDestroy(&diagV)); 2361 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2362 PetscFunctionReturn(PETSC_SUCCESS); 2363 } else if (n == 0) { 2364 if (m) { 2365 PetscCall(VecGetArrayWrite(v, &a)); 2366 for (r = 0; r < m; r++) { 2367 a[r] = PETSC_MAX_REAL; 2368 if (idx) idx[r] = -1; 2369 } 2370 PetscCall(VecRestoreArrayWrite(v, &a)); 2371 } 2372 PetscFunctionReturn(PETSC_SUCCESS); 2373 } 2374 2375 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2376 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2377 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2378 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2379 2380 /* Get offdiagIdx[] for implicit 0.0 */ 2381 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2382 ba = bav; 2383 bi = b->i; 2384 bj = b->j; 2385 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2386 for (r = 0; r < m; r++) { 2387 ncols = bi[r + 1] - bi[r]; 2388 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2389 offdiagA[r] = *ba; 2390 offdiagIdx[r] = cmap[0]; 2391 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2392 offdiagA[r] = 0.0; 2393 2394 /* Find first hole in the cmap */ 2395 for (j = 0; j < ncols; j++) { 2396 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2397 if (col > j && j < cstart) { 2398 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2399 break; 2400 } else if (col > j + n && j >= cstart) { 2401 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2402 break; 2403 } 2404 } 2405 if (j == ncols && ncols < A->cmap->N - n) { 2406 /* a hole is outside compressed Bcols */ 2407 if (ncols == 0) { 2408 if (cstart) { 2409 offdiagIdx[r] = 0; 2410 } else offdiagIdx[r] = cend; 2411 } else { /* ncols > 0 */ 2412 offdiagIdx[r] = cmap[ncols - 1] + 1; 2413 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2414 } 2415 } 2416 } 2417 2418 for (j = 0; j < ncols; j++) { 2419 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2420 offdiagA[r] = *ba; 2421 offdiagIdx[r] = cmap[*bj]; 2422 } 2423 ba++; 2424 bj++; 2425 } 2426 } 2427 2428 PetscCall(VecGetArrayWrite(v, &a)); 2429 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2430 for (r = 0; r < m; ++r) { 2431 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2432 a[r] = diagA[r]; 2433 if (idx) idx[r] = cstart + diagIdx[r]; 2434 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2435 a[r] = diagA[r]; 2436 if (idx) { 2437 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2438 idx[r] = cstart + diagIdx[r]; 2439 } else idx[r] = offdiagIdx[r]; 2440 } 2441 } else { 2442 a[r] = offdiagA[r]; 2443 if (idx) idx[r] = offdiagIdx[r]; 2444 } 2445 } 2446 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2447 PetscCall(VecRestoreArrayWrite(v, &a)); 2448 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2449 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2450 PetscCall(VecDestroy(&diagV)); 2451 PetscCall(VecDestroy(&offdiagV)); 2452 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2453 PetscFunctionReturn(PETSC_SUCCESS); 2454 } 2455 2456 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2457 { 2458 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2459 PetscInt m = A->rmap->n, n = A->cmap->n; 2460 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2461 PetscInt *cmap = mat->garray; 2462 PetscInt *diagIdx, *offdiagIdx; 2463 Vec diagV, offdiagV; 2464 PetscScalar *a, *diagA, *offdiagA; 2465 const PetscScalar *ba, *bav; 2466 PetscInt r, j, col, ncols, *bi, *bj; 2467 Mat B = mat->B; 2468 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2469 2470 PetscFunctionBegin; 2471 /* When a process holds entire A and other processes have no entry */ 2472 if (A->cmap->N == n) { 2473 PetscCall(VecGetArrayWrite(v, &diagA)); 2474 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2475 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2476 PetscCall(VecDestroy(&diagV)); 2477 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2478 PetscFunctionReturn(PETSC_SUCCESS); 2479 } else if (n == 0) { 2480 if (m) { 2481 PetscCall(VecGetArrayWrite(v, &a)); 2482 for (r = 0; r < m; r++) { 2483 a[r] = PETSC_MIN_REAL; 2484 if (idx) idx[r] = -1; 2485 } 2486 PetscCall(VecRestoreArrayWrite(v, &a)); 2487 } 2488 PetscFunctionReturn(PETSC_SUCCESS); 2489 } 2490 2491 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2492 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2493 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2494 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2495 2496 /* Get offdiagIdx[] for implicit 0.0 */ 2497 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2498 ba = bav; 2499 bi = b->i; 2500 bj = b->j; 2501 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2502 for (r = 0; r < m; r++) { 2503 ncols = bi[r + 1] - bi[r]; 2504 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2505 offdiagA[r] = *ba; 2506 offdiagIdx[r] = cmap[0]; 2507 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2508 offdiagA[r] = 0.0; 2509 2510 /* Find first hole in the cmap */ 2511 for (j = 0; j < ncols; j++) { 2512 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2513 if (col > j && j < cstart) { 2514 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2515 break; 2516 } else if (col > j + n && j >= cstart) { 2517 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2518 break; 2519 } 2520 } 2521 if (j == ncols && ncols < A->cmap->N - n) { 2522 /* a hole is outside compressed Bcols */ 2523 if (ncols == 0) { 2524 if (cstart) { 2525 offdiagIdx[r] = 0; 2526 } else offdiagIdx[r] = cend; 2527 } else { /* ncols > 0 */ 2528 offdiagIdx[r] = cmap[ncols - 1] + 1; 2529 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2530 } 2531 } 2532 } 2533 2534 for (j = 0; j < ncols; j++) { 2535 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2536 offdiagA[r] = *ba; 2537 offdiagIdx[r] = cmap[*bj]; 2538 } 2539 ba++; 2540 bj++; 2541 } 2542 } 2543 2544 PetscCall(VecGetArrayWrite(v, &a)); 2545 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2546 for (r = 0; r < m; ++r) { 2547 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2548 a[r] = diagA[r]; 2549 if (idx) idx[r] = cstart + diagIdx[r]; 2550 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2551 a[r] = diagA[r]; 2552 if (idx) { 2553 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2554 idx[r] = cstart + diagIdx[r]; 2555 } else idx[r] = offdiagIdx[r]; 2556 } 2557 } else { 2558 a[r] = offdiagA[r]; 2559 if (idx) idx[r] = offdiagIdx[r]; 2560 } 2561 } 2562 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2563 PetscCall(VecRestoreArrayWrite(v, &a)); 2564 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2565 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2566 PetscCall(VecDestroy(&diagV)); 2567 PetscCall(VecDestroy(&offdiagV)); 2568 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2569 PetscFunctionReturn(PETSC_SUCCESS); 2570 } 2571 2572 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2573 { 2574 Mat *dummy; 2575 2576 PetscFunctionBegin; 2577 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2578 *newmat = *dummy; 2579 PetscCall(PetscFree(dummy)); 2580 PetscFunctionReturn(PETSC_SUCCESS); 2581 } 2582 2583 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2584 { 2585 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2586 2587 PetscFunctionBegin; 2588 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2589 A->factorerrortype = a->A->factorerrortype; 2590 PetscFunctionReturn(PETSC_SUCCESS); 2591 } 2592 2593 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2594 { 2595 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2596 2597 PetscFunctionBegin; 2598 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2599 PetscCall(MatSetRandom(aij->A, rctx)); 2600 if (x->assembled) { 2601 PetscCall(MatSetRandom(aij->B, rctx)); 2602 } else { 2603 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2604 } 2605 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2606 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2607 PetscFunctionReturn(PETSC_SUCCESS); 2608 } 2609 2610 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2611 { 2612 PetscFunctionBegin; 2613 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2614 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2615 PetscFunctionReturn(PETSC_SUCCESS); 2616 } 2617 2618 /*@ 2619 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2620 2621 Not Collective 2622 2623 Input Parameter: 2624 . A - the matrix 2625 2626 Output Parameter: 2627 . nz - the number of nonzeros 2628 2629 Level: advanced 2630 2631 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2632 @*/ 2633 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2634 { 2635 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2636 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2637 PetscBool isaij; 2638 2639 PetscFunctionBegin; 2640 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2641 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2642 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2643 PetscFunctionReturn(PETSC_SUCCESS); 2644 } 2645 2646 /*@ 2647 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2648 2649 Collective 2650 2651 Input Parameters: 2652 + A - the matrix 2653 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2654 2655 Level: advanced 2656 2657 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2658 @*/ 2659 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2660 { 2661 PetscFunctionBegin; 2662 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2663 PetscFunctionReturn(PETSC_SUCCESS); 2664 } 2665 2666 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2667 { 2668 PetscBool sc = PETSC_FALSE, flg; 2669 2670 PetscFunctionBegin; 2671 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2672 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2673 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2674 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2675 PetscOptionsHeadEnd(); 2676 PetscFunctionReturn(PETSC_SUCCESS); 2677 } 2678 2679 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2680 { 2681 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2682 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2683 2684 PetscFunctionBegin; 2685 if (!Y->preallocated) { 2686 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2687 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2688 PetscInt nonew = aij->nonew; 2689 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2690 aij->nonew = nonew; 2691 } 2692 PetscCall(MatShift_Basic(Y, a)); 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2697 { 2698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2699 2700 PetscFunctionBegin; 2701 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2702 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2703 if (d) { 2704 PetscInt rstart; 2705 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2706 *d += rstart; 2707 } 2708 PetscFunctionReturn(PETSC_SUCCESS); 2709 } 2710 2711 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2712 { 2713 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2714 2715 PetscFunctionBegin; 2716 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2717 PetscFunctionReturn(PETSC_SUCCESS); 2718 } 2719 2720 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2721 { 2722 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2723 2724 PetscFunctionBegin; 2725 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2726 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2727 PetscFunctionReturn(PETSC_SUCCESS); 2728 } 2729 2730 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2731 MatGetRow_MPIAIJ, 2732 MatRestoreRow_MPIAIJ, 2733 MatMult_MPIAIJ, 2734 /* 4*/ MatMultAdd_MPIAIJ, 2735 MatMultTranspose_MPIAIJ, 2736 MatMultTransposeAdd_MPIAIJ, 2737 NULL, 2738 NULL, 2739 NULL, 2740 /*10*/ NULL, 2741 NULL, 2742 NULL, 2743 MatSOR_MPIAIJ, 2744 MatTranspose_MPIAIJ, 2745 /*15*/ MatGetInfo_MPIAIJ, 2746 MatEqual_MPIAIJ, 2747 MatGetDiagonal_MPIAIJ, 2748 MatDiagonalScale_MPIAIJ, 2749 MatNorm_MPIAIJ, 2750 /*20*/ MatAssemblyBegin_MPIAIJ, 2751 MatAssemblyEnd_MPIAIJ, 2752 MatSetOption_MPIAIJ, 2753 MatZeroEntries_MPIAIJ, 2754 /*24*/ MatZeroRows_MPIAIJ, 2755 NULL, 2756 NULL, 2757 NULL, 2758 NULL, 2759 /*29*/ MatSetUp_MPI_Hash, 2760 NULL, 2761 NULL, 2762 MatGetDiagonalBlock_MPIAIJ, 2763 NULL, 2764 /*34*/ MatDuplicate_MPIAIJ, 2765 NULL, 2766 NULL, 2767 NULL, 2768 NULL, 2769 /*39*/ MatAXPY_MPIAIJ, 2770 MatCreateSubMatrices_MPIAIJ, 2771 MatIncreaseOverlap_MPIAIJ, 2772 MatGetValues_MPIAIJ, 2773 MatCopy_MPIAIJ, 2774 /*44*/ MatGetRowMax_MPIAIJ, 2775 MatScale_MPIAIJ, 2776 MatShift_MPIAIJ, 2777 MatDiagonalSet_MPIAIJ, 2778 MatZeroRowsColumns_MPIAIJ, 2779 /*49*/ MatSetRandom_MPIAIJ, 2780 MatGetRowIJ_MPIAIJ, 2781 MatRestoreRowIJ_MPIAIJ, 2782 NULL, 2783 NULL, 2784 /*54*/ MatFDColoringCreate_MPIXAIJ, 2785 NULL, 2786 MatSetUnfactored_MPIAIJ, 2787 MatPermute_MPIAIJ, 2788 NULL, 2789 /*59*/ MatCreateSubMatrix_MPIAIJ, 2790 MatDestroy_MPIAIJ, 2791 MatView_MPIAIJ, 2792 NULL, 2793 NULL, 2794 /*64*/ NULL, 2795 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2800 MatGetRowMinAbs_MPIAIJ, 2801 NULL, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*75*/ MatFDColoringApply_AIJ, 2806 MatSetFromOptions_MPIAIJ, 2807 NULL, 2808 NULL, 2809 MatFindZeroDiagonals_MPIAIJ, 2810 /*80*/ NULL, 2811 NULL, 2812 NULL, 2813 /*83*/ MatLoad_MPIAIJ, 2814 NULL, 2815 NULL, 2816 NULL, 2817 NULL, 2818 NULL, 2819 /*89*/ NULL, 2820 NULL, 2821 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2822 NULL, 2823 NULL, 2824 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2825 NULL, 2826 NULL, 2827 NULL, 2828 MatBindToCPU_MPIAIJ, 2829 /*99*/ MatProductSetFromOptions_MPIAIJ, 2830 NULL, 2831 NULL, 2832 MatConjugate_MPIAIJ, 2833 NULL, 2834 /*104*/ MatSetValuesRow_MPIAIJ, 2835 MatRealPart_MPIAIJ, 2836 MatImaginaryPart_MPIAIJ, 2837 NULL, 2838 NULL, 2839 /*109*/ NULL, 2840 NULL, 2841 MatGetRowMin_MPIAIJ, 2842 NULL, 2843 MatMissingDiagonal_MPIAIJ, 2844 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2845 NULL, 2846 MatGetGhosts_MPIAIJ, 2847 NULL, 2848 NULL, 2849 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2850 NULL, 2851 NULL, 2852 NULL, 2853 MatGetMultiProcBlock_MPIAIJ, 2854 /*124*/ MatFindNonzeroRows_MPIAIJ, 2855 MatGetColumnReductions_MPIAIJ, 2856 MatInvertBlockDiagonal_MPIAIJ, 2857 MatInvertVariableBlockDiagonal_MPIAIJ, 2858 MatCreateSubMatricesMPI_MPIAIJ, 2859 /*129*/ NULL, 2860 NULL, 2861 NULL, 2862 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2863 NULL, 2864 /*134*/ NULL, 2865 NULL, 2866 NULL, 2867 NULL, 2868 NULL, 2869 /*139*/ MatSetBlockSizes_MPIAIJ, 2870 NULL, 2871 NULL, 2872 MatFDColoringSetUp_MPIXAIJ, 2873 MatFindOffBlockDiagonalEntries_MPIAIJ, 2874 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2875 /*145*/ NULL, 2876 NULL, 2877 NULL, 2878 MatCreateGraph_Simple_AIJ, 2879 NULL, 2880 /*150*/ NULL, 2881 MatEliminateZeros_MPIAIJ, 2882 MatGetRowSumAbs_MPIAIJ, 2883 NULL, 2884 NULL, 2885 /*155*/ NULL, 2886 MatCopyHashToXAIJ_MPI_Hash}; 2887 2888 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2889 { 2890 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2891 2892 PetscFunctionBegin; 2893 PetscCall(MatStoreValues(aij->A)); 2894 PetscCall(MatStoreValues(aij->B)); 2895 PetscFunctionReturn(PETSC_SUCCESS); 2896 } 2897 2898 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2899 { 2900 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2901 2902 PetscFunctionBegin; 2903 PetscCall(MatRetrieveValues(aij->A)); 2904 PetscCall(MatRetrieveValues(aij->B)); 2905 PetscFunctionReturn(PETSC_SUCCESS); 2906 } 2907 2908 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2909 { 2910 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2911 PetscMPIInt size; 2912 2913 PetscFunctionBegin; 2914 if (B->hash_active) { 2915 B->ops[0] = b->cops; 2916 B->hash_active = PETSC_FALSE; 2917 } 2918 PetscCall(PetscLayoutSetUp(B->rmap)); 2919 PetscCall(PetscLayoutSetUp(B->cmap)); 2920 2921 #if defined(PETSC_USE_CTABLE) 2922 PetscCall(PetscHMapIDestroy(&b->colmap)); 2923 #else 2924 PetscCall(PetscFree(b->colmap)); 2925 #endif 2926 PetscCall(PetscFree(b->garray)); 2927 PetscCall(VecDestroy(&b->lvec)); 2928 PetscCall(VecScatterDestroy(&b->Mvctx)); 2929 2930 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2931 2932 MatSeqXAIJGetOptions_Private(b->B); 2933 PetscCall(MatDestroy(&b->B)); 2934 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2935 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2936 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2937 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2938 MatSeqXAIJRestoreOptions_Private(b->B); 2939 2940 MatSeqXAIJGetOptions_Private(b->A); 2941 PetscCall(MatDestroy(&b->A)); 2942 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2943 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2944 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2945 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2946 MatSeqXAIJRestoreOptions_Private(b->A); 2947 2948 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2949 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2950 B->preallocated = PETSC_TRUE; 2951 B->was_assembled = PETSC_FALSE; 2952 B->assembled = PETSC_FALSE; 2953 PetscFunctionReturn(PETSC_SUCCESS); 2954 } 2955 2956 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2957 { 2958 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2959 2960 PetscFunctionBegin; 2961 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2962 PetscCall(PetscLayoutSetUp(B->rmap)); 2963 PetscCall(PetscLayoutSetUp(B->cmap)); 2964 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2965 else { 2966 #if defined(PETSC_USE_CTABLE) 2967 PetscCall(PetscHMapIDestroy(&b->colmap)); 2968 #else 2969 PetscCall(PetscFree(b->colmap)); 2970 #endif 2971 PetscCall(PetscFree(b->garray)); 2972 PetscCall(VecDestroy(&b->lvec)); 2973 } 2974 PetscCall(VecScatterDestroy(&b->Mvctx)); 2975 2976 PetscCall(MatResetPreallocation(b->A)); 2977 PetscCall(MatResetPreallocation(b->B)); 2978 B->preallocated = PETSC_TRUE; 2979 B->was_assembled = PETSC_FALSE; 2980 B->assembled = PETSC_FALSE; 2981 PetscFunctionReturn(PETSC_SUCCESS); 2982 } 2983 2984 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2985 { 2986 Mat mat; 2987 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2988 2989 PetscFunctionBegin; 2990 *newmat = NULL; 2991 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2992 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2993 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2994 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2995 a = (Mat_MPIAIJ *)mat->data; 2996 2997 mat->factortype = matin->factortype; 2998 mat->assembled = matin->assembled; 2999 mat->insertmode = NOT_SET_VALUES; 3000 3001 a->size = oldmat->size; 3002 a->rank = oldmat->rank; 3003 a->donotstash = oldmat->donotstash; 3004 a->roworiented = oldmat->roworiented; 3005 a->rowindices = NULL; 3006 a->rowvalues = NULL; 3007 a->getrowactive = PETSC_FALSE; 3008 3009 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3010 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3011 if (matin->hash_active) { 3012 PetscCall(MatSetUp(mat)); 3013 } else { 3014 mat->preallocated = matin->preallocated; 3015 if (oldmat->colmap) { 3016 #if defined(PETSC_USE_CTABLE) 3017 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3018 #else 3019 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3020 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3021 #endif 3022 } else a->colmap = NULL; 3023 if (oldmat->garray) { 3024 PetscInt len; 3025 len = oldmat->B->cmap->n; 3026 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3027 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3028 } else a->garray = NULL; 3029 3030 /* It may happen MatDuplicate is called with a non-assembled matrix 3031 In fact, MatDuplicate only requires the matrix to be preallocated 3032 This may happen inside a DMCreateMatrix_Shell */ 3033 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3034 if (oldmat->Mvctx) { 3035 a->Mvctx = oldmat->Mvctx; 3036 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3037 } 3038 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3039 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3040 } 3041 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3042 *newmat = mat; 3043 PetscFunctionReturn(PETSC_SUCCESS); 3044 } 3045 3046 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3047 { 3048 PetscBool isbinary, ishdf5; 3049 3050 PetscFunctionBegin; 3051 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3052 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3053 /* force binary viewer to load .info file if it has not yet done so */ 3054 PetscCall(PetscViewerSetUp(viewer)); 3055 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3056 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3057 if (isbinary) { 3058 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3059 } else if (ishdf5) { 3060 #if defined(PETSC_HAVE_HDF5) 3061 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3062 #else 3063 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3064 #endif 3065 } else { 3066 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3067 } 3068 PetscFunctionReturn(PETSC_SUCCESS); 3069 } 3070 3071 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3072 { 3073 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3074 PetscInt *rowidxs, *colidxs; 3075 PetscScalar *matvals; 3076 3077 PetscFunctionBegin; 3078 PetscCall(PetscViewerSetUp(viewer)); 3079 3080 /* read in matrix header */ 3081 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3082 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3083 M = header[1]; 3084 N = header[2]; 3085 nz = header[3]; 3086 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3087 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3088 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3089 3090 /* set block sizes from the viewer's .info file */ 3091 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3092 /* set global sizes if not set already */ 3093 if (mat->rmap->N < 0) mat->rmap->N = M; 3094 if (mat->cmap->N < 0) mat->cmap->N = N; 3095 PetscCall(PetscLayoutSetUp(mat->rmap)); 3096 PetscCall(PetscLayoutSetUp(mat->cmap)); 3097 3098 /* check if the matrix sizes are correct */ 3099 PetscCall(MatGetSize(mat, &rows, &cols)); 3100 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3101 3102 /* read in row lengths and build row indices */ 3103 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3104 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3105 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3106 rowidxs[0] = 0; 3107 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3108 if (nz != PETSC_INT_MAX) { 3109 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3110 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3111 } 3112 3113 /* read in column indices and matrix values */ 3114 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3115 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3116 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3117 /* store matrix indices and values */ 3118 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3119 PetscCall(PetscFree(rowidxs)); 3120 PetscCall(PetscFree2(colidxs, matvals)); 3121 PetscFunctionReturn(PETSC_SUCCESS); 3122 } 3123 3124 /* Not scalable because of ISAllGather() unless getting all columns. */ 3125 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3126 { 3127 IS iscol_local; 3128 PetscBool isstride; 3129 PetscMPIInt lisstride = 0, gisstride; 3130 3131 PetscFunctionBegin; 3132 /* check if we are grabbing all columns*/ 3133 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3134 3135 if (isstride) { 3136 PetscInt start, len, mstart, mlen; 3137 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3138 PetscCall(ISGetLocalSize(iscol, &len)); 3139 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3140 if (mstart == start && mlen - mstart == len) lisstride = 1; 3141 } 3142 3143 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3144 if (gisstride) { 3145 PetscInt N; 3146 PetscCall(MatGetSize(mat, NULL, &N)); 3147 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3148 PetscCall(ISSetIdentity(iscol_local)); 3149 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3150 } else { 3151 PetscInt cbs; 3152 PetscCall(ISGetBlockSize(iscol, &cbs)); 3153 PetscCall(ISAllGather(iscol, &iscol_local)); 3154 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3155 } 3156 3157 *isseq = iscol_local; 3158 PetscFunctionReturn(PETSC_SUCCESS); 3159 } 3160 3161 /* 3162 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3163 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3164 3165 Input Parameters: 3166 + mat - matrix 3167 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3168 i.e., mat->rstart <= isrow[i] < mat->rend 3169 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3170 i.e., mat->cstart <= iscol[i] < mat->cend 3171 3172 Output Parameters: 3173 + isrow_d - sequential row index set for retrieving mat->A 3174 . iscol_d - sequential column index set for retrieving mat->A 3175 . iscol_o - sequential column index set for retrieving mat->B 3176 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3177 */ 3178 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3179 { 3180 Vec x, cmap; 3181 const PetscInt *is_idx; 3182 PetscScalar *xarray, *cmaparray; 3183 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3184 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3185 Mat B = a->B; 3186 Vec lvec = a->lvec, lcmap; 3187 PetscInt i, cstart, cend, Bn = B->cmap->N; 3188 MPI_Comm comm; 3189 VecScatter Mvctx = a->Mvctx; 3190 3191 PetscFunctionBegin; 3192 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3193 PetscCall(ISGetLocalSize(iscol, &ncols)); 3194 3195 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3196 PetscCall(MatCreateVecs(mat, &x, NULL)); 3197 PetscCall(VecSet(x, -1.0)); 3198 PetscCall(VecDuplicate(x, &cmap)); 3199 PetscCall(VecSet(cmap, -1.0)); 3200 3201 /* Get start indices */ 3202 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3203 isstart -= ncols; 3204 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3205 3206 PetscCall(ISGetIndices(iscol, &is_idx)); 3207 PetscCall(VecGetArray(x, &xarray)); 3208 PetscCall(VecGetArray(cmap, &cmaparray)); 3209 PetscCall(PetscMalloc1(ncols, &idx)); 3210 for (i = 0; i < ncols; i++) { 3211 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3212 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3213 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3214 } 3215 PetscCall(VecRestoreArray(x, &xarray)); 3216 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3217 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3218 3219 /* Get iscol_d */ 3220 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3221 PetscCall(ISGetBlockSize(iscol, &i)); 3222 PetscCall(ISSetBlockSize(*iscol_d, i)); 3223 3224 /* Get isrow_d */ 3225 PetscCall(ISGetLocalSize(isrow, &m)); 3226 rstart = mat->rmap->rstart; 3227 PetscCall(PetscMalloc1(m, &idx)); 3228 PetscCall(ISGetIndices(isrow, &is_idx)); 3229 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3230 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3231 3232 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3233 PetscCall(ISGetBlockSize(isrow, &i)); 3234 PetscCall(ISSetBlockSize(*isrow_d, i)); 3235 3236 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3237 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3238 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3239 3240 PetscCall(VecDuplicate(lvec, &lcmap)); 3241 3242 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3243 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3244 3245 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3246 /* off-process column indices */ 3247 count = 0; 3248 PetscCall(PetscMalloc1(Bn, &idx)); 3249 PetscCall(PetscMalloc1(Bn, &cmap1)); 3250 3251 PetscCall(VecGetArray(lvec, &xarray)); 3252 PetscCall(VecGetArray(lcmap, &cmaparray)); 3253 for (i = 0; i < Bn; i++) { 3254 if (PetscRealPart(xarray[i]) > -1.0) { 3255 idx[count] = i; /* local column index in off-diagonal part B */ 3256 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3257 count++; 3258 } 3259 } 3260 PetscCall(VecRestoreArray(lvec, &xarray)); 3261 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3262 3263 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3264 /* cannot ensure iscol_o has same blocksize as iscol! */ 3265 3266 PetscCall(PetscFree(idx)); 3267 *garray = cmap1; 3268 3269 PetscCall(VecDestroy(&x)); 3270 PetscCall(VecDestroy(&cmap)); 3271 PetscCall(VecDestroy(&lcmap)); 3272 PetscFunctionReturn(PETSC_SUCCESS); 3273 } 3274 3275 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3276 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3277 { 3278 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3279 Mat M = NULL; 3280 MPI_Comm comm; 3281 IS iscol_d, isrow_d, iscol_o; 3282 Mat Asub = NULL, Bsub = NULL; 3283 PetscInt n; 3284 3285 PetscFunctionBegin; 3286 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3287 3288 if (call == MAT_REUSE_MATRIX) { 3289 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3290 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3291 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3292 3293 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3294 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3295 3296 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3297 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3298 3299 /* Update diagonal and off-diagonal portions of submat */ 3300 asub = (Mat_MPIAIJ *)(*submat)->data; 3301 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3302 PetscCall(ISGetLocalSize(iscol_o, &n)); 3303 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3304 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3305 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3306 3307 } else { /* call == MAT_INITIAL_MATRIX) */ 3308 PetscInt *garray; 3309 PetscInt BsubN; 3310 3311 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3312 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3313 3314 /* Create local submatrices Asub and Bsub */ 3315 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3316 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3317 3318 /* Create submatrix M */ 3319 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3320 3321 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3322 asub = (Mat_MPIAIJ *)M->data; 3323 3324 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3325 n = asub->B->cmap->N; 3326 if (BsubN > n) { 3327 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3328 const PetscInt *idx; 3329 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3330 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3331 3332 PetscCall(PetscMalloc1(n, &idx_new)); 3333 j = 0; 3334 PetscCall(ISGetIndices(iscol_o, &idx)); 3335 for (i = 0; i < n; i++) { 3336 if (j >= BsubN) break; 3337 while (subgarray[i] > garray[j]) j++; 3338 3339 if (subgarray[i] == garray[j]) { 3340 idx_new[i] = idx[j++]; 3341 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3342 } 3343 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3344 3345 PetscCall(ISDestroy(&iscol_o)); 3346 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3347 3348 } else if (BsubN < n) { 3349 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3350 } 3351 3352 PetscCall(PetscFree(garray)); 3353 *submat = M; 3354 3355 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3356 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3357 PetscCall(ISDestroy(&isrow_d)); 3358 3359 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3360 PetscCall(ISDestroy(&iscol_d)); 3361 3362 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3363 PetscCall(ISDestroy(&iscol_o)); 3364 } 3365 PetscFunctionReturn(PETSC_SUCCESS); 3366 } 3367 3368 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3369 { 3370 IS iscol_local = NULL, isrow_d; 3371 PetscInt csize; 3372 PetscInt n, i, j, start, end; 3373 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3374 MPI_Comm comm; 3375 3376 PetscFunctionBegin; 3377 /* If isrow has same processor distribution as mat, 3378 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3379 if (call == MAT_REUSE_MATRIX) { 3380 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3381 if (isrow_d) { 3382 sameRowDist = PETSC_TRUE; 3383 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3384 } else { 3385 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3386 if (iscol_local) { 3387 sameRowDist = PETSC_TRUE; 3388 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3389 } 3390 } 3391 } else { 3392 /* Check if isrow has same processor distribution as mat */ 3393 sameDist[0] = PETSC_FALSE; 3394 PetscCall(ISGetLocalSize(isrow, &n)); 3395 if (!n) { 3396 sameDist[0] = PETSC_TRUE; 3397 } else { 3398 PetscCall(ISGetMinMax(isrow, &i, &j)); 3399 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3400 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3401 } 3402 3403 /* Check if iscol has same processor distribution as mat */ 3404 sameDist[1] = PETSC_FALSE; 3405 PetscCall(ISGetLocalSize(iscol, &n)); 3406 if (!n) { 3407 sameDist[1] = PETSC_TRUE; 3408 } else { 3409 PetscCall(ISGetMinMax(iscol, &i, &j)); 3410 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3411 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3412 } 3413 3414 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3415 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3416 sameRowDist = tsameDist[0]; 3417 } 3418 3419 if (sameRowDist) { 3420 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3421 /* isrow and iscol have same processor distribution as mat */ 3422 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3423 PetscFunctionReturn(PETSC_SUCCESS); 3424 } else { /* sameRowDist */ 3425 /* isrow has same processor distribution as mat */ 3426 if (call == MAT_INITIAL_MATRIX) { 3427 PetscBool sorted; 3428 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3429 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3430 PetscCall(ISGetSize(iscol, &i)); 3431 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3432 3433 PetscCall(ISSorted(iscol_local, &sorted)); 3434 if (sorted) { 3435 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3436 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3437 PetscFunctionReturn(PETSC_SUCCESS); 3438 } 3439 } else { /* call == MAT_REUSE_MATRIX */ 3440 IS iscol_sub; 3441 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3442 if (iscol_sub) { 3443 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3444 PetscFunctionReturn(PETSC_SUCCESS); 3445 } 3446 } 3447 } 3448 } 3449 3450 /* General case: iscol -> iscol_local which has global size of iscol */ 3451 if (call == MAT_REUSE_MATRIX) { 3452 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3453 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3454 } else { 3455 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3456 } 3457 3458 PetscCall(ISGetLocalSize(iscol, &csize)); 3459 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3460 3461 if (call == MAT_INITIAL_MATRIX) { 3462 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3463 PetscCall(ISDestroy(&iscol_local)); 3464 } 3465 PetscFunctionReturn(PETSC_SUCCESS); 3466 } 3467 3468 /*@C 3469 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3470 and "off-diagonal" part of the matrix in CSR format. 3471 3472 Collective 3473 3474 Input Parameters: 3475 + comm - MPI communicator 3476 . A - "diagonal" portion of matrix 3477 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3478 - garray - global index of `B` columns 3479 3480 Output Parameter: 3481 . mat - the matrix, with input `A` as its local diagonal matrix 3482 3483 Level: advanced 3484 3485 Notes: 3486 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3487 3488 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3489 3490 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3491 @*/ 3492 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3493 { 3494 Mat_MPIAIJ *maij; 3495 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3496 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3497 const PetscScalar *oa; 3498 Mat Bnew; 3499 PetscInt m, n, N; 3500 MatType mpi_mat_type; 3501 3502 PetscFunctionBegin; 3503 PetscCall(MatCreate(comm, mat)); 3504 PetscCall(MatGetSize(A, &m, &n)); 3505 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3506 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3507 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3508 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3509 3510 /* Get global columns of mat */ 3511 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3512 3513 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3514 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3515 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3516 PetscCall(MatSetType(*mat, mpi_mat_type)); 3517 3518 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3519 maij = (Mat_MPIAIJ *)(*mat)->data; 3520 3521 (*mat)->preallocated = PETSC_TRUE; 3522 3523 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3524 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3525 3526 /* Set A as diagonal portion of *mat */ 3527 maij->A = A; 3528 3529 nz = oi[m]; 3530 for (i = 0; i < nz; i++) { 3531 col = oj[i]; 3532 oj[i] = garray[col]; 3533 } 3534 3535 /* Set Bnew as off-diagonal portion of *mat */ 3536 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3537 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3538 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3539 bnew = (Mat_SeqAIJ *)Bnew->data; 3540 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3541 maij->B = Bnew; 3542 3543 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3544 3545 b->free_a = PETSC_FALSE; 3546 b->free_ij = PETSC_FALSE; 3547 PetscCall(MatDestroy(&B)); 3548 3549 bnew->free_a = PETSC_TRUE; 3550 bnew->free_ij = PETSC_TRUE; 3551 3552 /* condense columns of maij->B */ 3553 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3554 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3555 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3556 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3557 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3558 PetscFunctionReturn(PETSC_SUCCESS); 3559 } 3560 3561 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3562 3563 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3564 { 3565 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3566 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3567 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3568 Mat M, Msub, B = a->B; 3569 MatScalar *aa; 3570 Mat_SeqAIJ *aij; 3571 PetscInt *garray = a->garray, *colsub, Ncols; 3572 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3573 IS iscol_sub, iscmap; 3574 const PetscInt *is_idx, *cmap; 3575 PetscBool allcolumns = PETSC_FALSE; 3576 MPI_Comm comm; 3577 3578 PetscFunctionBegin; 3579 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3580 if (call == MAT_REUSE_MATRIX) { 3581 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3582 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3583 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3584 3585 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3586 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3587 3588 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3589 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3590 3591 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3592 3593 } else { /* call == MAT_INITIAL_MATRIX) */ 3594 PetscBool flg; 3595 3596 PetscCall(ISGetLocalSize(iscol, &n)); 3597 PetscCall(ISGetSize(iscol, &Ncols)); 3598 3599 /* (1) iscol -> nonscalable iscol_local */ 3600 /* Check for special case: each processor gets entire matrix columns */ 3601 PetscCall(ISIdentity(iscol_local, &flg)); 3602 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3603 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3604 if (allcolumns) { 3605 iscol_sub = iscol_local; 3606 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3607 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3608 3609 } else { 3610 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3611 PetscInt *idx, *cmap1, k; 3612 PetscCall(PetscMalloc1(Ncols, &idx)); 3613 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3614 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3615 count = 0; 3616 k = 0; 3617 for (i = 0; i < Ncols; i++) { 3618 j = is_idx[i]; 3619 if (j >= cstart && j < cend) { 3620 /* diagonal part of mat */ 3621 idx[count] = j; 3622 cmap1[count++] = i; /* column index in submat */ 3623 } else if (Bn) { 3624 /* off-diagonal part of mat */ 3625 if (j == garray[k]) { 3626 idx[count] = j; 3627 cmap1[count++] = i; /* column index in submat */ 3628 } else if (j > garray[k]) { 3629 while (j > garray[k] && k < Bn - 1) k++; 3630 if (j == garray[k]) { 3631 idx[count] = j; 3632 cmap1[count++] = i; /* column index in submat */ 3633 } 3634 } 3635 } 3636 } 3637 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3638 3639 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3640 PetscCall(ISGetBlockSize(iscol, &cbs)); 3641 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3642 3643 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3644 } 3645 3646 /* (3) Create sequential Msub */ 3647 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3648 } 3649 3650 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3651 aij = (Mat_SeqAIJ *)Msub->data; 3652 ii = aij->i; 3653 PetscCall(ISGetIndices(iscmap, &cmap)); 3654 3655 /* 3656 m - number of local rows 3657 Ncols - number of columns (same on all processors) 3658 rstart - first row in new global matrix generated 3659 */ 3660 PetscCall(MatGetSize(Msub, &m, NULL)); 3661 3662 if (call == MAT_INITIAL_MATRIX) { 3663 /* (4) Create parallel newmat */ 3664 PetscMPIInt rank, size; 3665 PetscInt csize; 3666 3667 PetscCallMPI(MPI_Comm_size(comm, &size)); 3668 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3669 3670 /* 3671 Determine the number of non-zeros in the diagonal and off-diagonal 3672 portions of the matrix in order to do correct preallocation 3673 */ 3674 3675 /* first get start and end of "diagonal" columns */ 3676 PetscCall(ISGetLocalSize(iscol, &csize)); 3677 if (csize == PETSC_DECIDE) { 3678 PetscCall(ISGetSize(isrow, &mglobal)); 3679 if (mglobal == Ncols) { /* square matrix */ 3680 nlocal = m; 3681 } else { 3682 nlocal = Ncols / size + ((Ncols % size) > rank); 3683 } 3684 } else { 3685 nlocal = csize; 3686 } 3687 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3688 rstart = rend - nlocal; 3689 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3690 3691 /* next, compute all the lengths */ 3692 jj = aij->j; 3693 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3694 olens = dlens + m; 3695 for (i = 0; i < m; i++) { 3696 jend = ii[i + 1] - ii[i]; 3697 olen = 0; 3698 dlen = 0; 3699 for (j = 0; j < jend; j++) { 3700 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3701 else dlen++; 3702 jj++; 3703 } 3704 olens[i] = olen; 3705 dlens[i] = dlen; 3706 } 3707 3708 PetscCall(ISGetBlockSize(isrow, &bs)); 3709 PetscCall(ISGetBlockSize(iscol, &cbs)); 3710 3711 PetscCall(MatCreate(comm, &M)); 3712 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3713 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3714 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3715 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3716 PetscCall(PetscFree(dlens)); 3717 3718 } else { /* call == MAT_REUSE_MATRIX */ 3719 M = *newmat; 3720 PetscCall(MatGetLocalSize(M, &i, NULL)); 3721 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3722 PetscCall(MatZeroEntries(M)); 3723 /* 3724 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3725 rather than the slower MatSetValues(). 3726 */ 3727 M->was_assembled = PETSC_TRUE; 3728 M->assembled = PETSC_FALSE; 3729 } 3730 3731 /* (5) Set values of Msub to *newmat */ 3732 PetscCall(PetscMalloc1(count, &colsub)); 3733 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3734 3735 jj = aij->j; 3736 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3737 for (i = 0; i < m; i++) { 3738 row = rstart + i; 3739 nz = ii[i + 1] - ii[i]; 3740 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3741 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3742 jj += nz; 3743 aa += nz; 3744 } 3745 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3746 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3747 3748 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3749 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3750 3751 PetscCall(PetscFree(colsub)); 3752 3753 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3754 if (call == MAT_INITIAL_MATRIX) { 3755 *newmat = M; 3756 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3757 PetscCall(MatDestroy(&Msub)); 3758 3759 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3760 PetscCall(ISDestroy(&iscol_sub)); 3761 3762 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3763 PetscCall(ISDestroy(&iscmap)); 3764 3765 if (iscol_local) { 3766 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3767 PetscCall(ISDestroy(&iscol_local)); 3768 } 3769 } 3770 PetscFunctionReturn(PETSC_SUCCESS); 3771 } 3772 3773 /* 3774 Not great since it makes two copies of the submatrix, first an SeqAIJ 3775 in local and then by concatenating the local matrices the end result. 3776 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3777 3778 This requires a sequential iscol with all indices. 3779 */ 3780 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3781 { 3782 PetscMPIInt rank, size; 3783 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3784 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3785 Mat M, Mreuse; 3786 MatScalar *aa, *vwork; 3787 MPI_Comm comm; 3788 Mat_SeqAIJ *aij; 3789 PetscBool colflag, allcolumns = PETSC_FALSE; 3790 3791 PetscFunctionBegin; 3792 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3793 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3794 PetscCallMPI(MPI_Comm_size(comm, &size)); 3795 3796 /* Check for special case: each processor gets entire matrix columns */ 3797 PetscCall(ISIdentity(iscol, &colflag)); 3798 PetscCall(ISGetLocalSize(iscol, &n)); 3799 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3800 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3801 3802 if (call == MAT_REUSE_MATRIX) { 3803 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3804 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3805 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3806 } else { 3807 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3808 } 3809 3810 /* 3811 m - number of local rows 3812 n - number of columns (same on all processors) 3813 rstart - first row in new global matrix generated 3814 */ 3815 PetscCall(MatGetSize(Mreuse, &m, &n)); 3816 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3817 if (call == MAT_INITIAL_MATRIX) { 3818 aij = (Mat_SeqAIJ *)Mreuse->data; 3819 ii = aij->i; 3820 jj = aij->j; 3821 3822 /* 3823 Determine the number of non-zeros in the diagonal and off-diagonal 3824 portions of the matrix in order to do correct preallocation 3825 */ 3826 3827 /* first get start and end of "diagonal" columns */ 3828 if (csize == PETSC_DECIDE) { 3829 PetscCall(ISGetSize(isrow, &mglobal)); 3830 if (mglobal == n) { /* square matrix */ 3831 nlocal = m; 3832 } else { 3833 nlocal = n / size + ((n % size) > rank); 3834 } 3835 } else { 3836 nlocal = csize; 3837 } 3838 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3839 rstart = rend - nlocal; 3840 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3841 3842 /* next, compute all the lengths */ 3843 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3844 olens = dlens + m; 3845 for (i = 0; i < m; i++) { 3846 jend = ii[i + 1] - ii[i]; 3847 olen = 0; 3848 dlen = 0; 3849 for (j = 0; j < jend; j++) { 3850 if (*jj < rstart || *jj >= rend) olen++; 3851 else dlen++; 3852 jj++; 3853 } 3854 olens[i] = olen; 3855 dlens[i] = dlen; 3856 } 3857 PetscCall(MatCreate(comm, &M)); 3858 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3859 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3860 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3861 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3862 PetscCall(PetscFree(dlens)); 3863 } else { 3864 PetscInt ml, nl; 3865 3866 M = *newmat; 3867 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3868 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3869 PetscCall(MatZeroEntries(M)); 3870 /* 3871 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3872 rather than the slower MatSetValues(). 3873 */ 3874 M->was_assembled = PETSC_TRUE; 3875 M->assembled = PETSC_FALSE; 3876 } 3877 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3878 aij = (Mat_SeqAIJ *)Mreuse->data; 3879 ii = aij->i; 3880 jj = aij->j; 3881 3882 /* trigger copy to CPU if needed */ 3883 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3884 for (i = 0; i < m; i++) { 3885 row = rstart + i; 3886 nz = ii[i + 1] - ii[i]; 3887 cwork = jj; 3888 jj = PetscSafePointerPlusOffset(jj, nz); 3889 vwork = aa; 3890 aa = PetscSafePointerPlusOffset(aa, nz); 3891 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3892 } 3893 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3894 3895 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3896 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3897 *newmat = M; 3898 3899 /* save submatrix used in processor for next request */ 3900 if (call == MAT_INITIAL_MATRIX) { 3901 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3902 PetscCall(MatDestroy(&Mreuse)); 3903 } 3904 PetscFunctionReturn(PETSC_SUCCESS); 3905 } 3906 3907 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3908 { 3909 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3910 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3911 const PetscInt *JJ; 3912 PetscBool nooffprocentries; 3913 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3914 3915 PetscFunctionBegin; 3916 PetscCall(PetscLayoutSetUp(B->rmap)); 3917 PetscCall(PetscLayoutSetUp(B->cmap)); 3918 m = B->rmap->n; 3919 cstart = B->cmap->rstart; 3920 cend = B->cmap->rend; 3921 rstart = B->rmap->rstart; 3922 irstart = Ii[0]; 3923 3924 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3925 3926 if (PetscDefined(USE_DEBUG)) { 3927 for (i = 0; i < m; i++) { 3928 nnz = Ii[i + 1] - Ii[i]; 3929 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3930 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3931 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3932 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3933 } 3934 } 3935 3936 for (i = 0; i < m; i++) { 3937 nnz = Ii[i + 1] - Ii[i]; 3938 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3939 nnz_max = PetscMax(nnz_max, nnz); 3940 d = 0; 3941 for (j = 0; j < nnz; j++) { 3942 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3943 } 3944 d_nnz[i] = d; 3945 o_nnz[i] = nnz - d; 3946 } 3947 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3948 PetscCall(PetscFree2(d_nnz, o_nnz)); 3949 3950 for (i = 0; i < m; i++) { 3951 ii = i + rstart; 3952 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3953 } 3954 nooffprocentries = B->nooffprocentries; 3955 B->nooffprocentries = PETSC_TRUE; 3956 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3957 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3958 B->nooffprocentries = nooffprocentries; 3959 3960 /* count number of entries below block diagonal */ 3961 PetscCall(PetscFree(Aij->ld)); 3962 PetscCall(PetscCalloc1(m, &ld)); 3963 Aij->ld = ld; 3964 for (i = 0; i < m; i++) { 3965 nnz = Ii[i + 1] - Ii[i]; 3966 j = 0; 3967 while (j < nnz && J[j] < cstart) j++; 3968 ld[i] = j; 3969 if (J) J += nnz; 3970 } 3971 3972 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3973 PetscFunctionReturn(PETSC_SUCCESS); 3974 } 3975 3976 /*@ 3977 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3978 (the default parallel PETSc format). 3979 3980 Collective 3981 3982 Input Parameters: 3983 + B - the matrix 3984 . i - the indices into `j` for the start of each local row (indices start with zero) 3985 . j - the column indices for each local row (indices start with zero) 3986 - v - optional values in the matrix 3987 3988 Level: developer 3989 3990 Notes: 3991 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3992 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3993 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3994 3995 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3996 3997 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3998 3999 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4000 4001 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4002 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4003 4004 The format which is used for the sparse matrix input, is equivalent to a 4005 row-major ordering.. i.e for the following matrix, the input data expected is 4006 as shown 4007 .vb 4008 1 0 0 4009 2 0 3 P0 4010 ------- 4011 4 5 6 P1 4012 4013 Process0 [P0] rows_owned=[0,1] 4014 i = {0,1,3} [size = nrow+1 = 2+1] 4015 j = {0,0,2} [size = 3] 4016 v = {1,2,3} [size = 3] 4017 4018 Process1 [P1] rows_owned=[2] 4019 i = {0,3} [size = nrow+1 = 1+1] 4020 j = {0,1,2} [size = 3] 4021 v = {4,5,6} [size = 3] 4022 .ve 4023 4024 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4025 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4026 @*/ 4027 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4028 { 4029 PetscFunctionBegin; 4030 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4031 PetscFunctionReturn(PETSC_SUCCESS); 4032 } 4033 4034 /*@ 4035 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4036 (the default parallel PETSc format). For good matrix assembly performance 4037 the user should preallocate the matrix storage by setting the parameters 4038 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4039 4040 Collective 4041 4042 Input Parameters: 4043 + B - the matrix 4044 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4045 (same value is used for all local rows) 4046 . d_nnz - array containing the number of nonzeros in the various rows of the 4047 DIAGONAL portion of the local submatrix (possibly different for each row) 4048 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4049 The size of this array is equal to the number of local rows, i.e 'm'. 4050 For matrices that will be factored, you must leave room for (and set) 4051 the diagonal entry even if it is zero. 4052 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4053 submatrix (same value is used for all local rows). 4054 - o_nnz - array containing the number of nonzeros in the various rows of the 4055 OFF-DIAGONAL portion of the local submatrix (possibly different for 4056 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4057 structure. The size of this array is equal to the number 4058 of local rows, i.e 'm'. 4059 4060 Example Usage: 4061 Consider the following 8x8 matrix with 34 non-zero values, that is 4062 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4063 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4064 as follows 4065 4066 .vb 4067 1 2 0 | 0 3 0 | 0 4 4068 Proc0 0 5 6 | 7 0 0 | 8 0 4069 9 0 10 | 11 0 0 | 12 0 4070 ------------------------------------- 4071 13 0 14 | 15 16 17 | 0 0 4072 Proc1 0 18 0 | 19 20 21 | 0 0 4073 0 0 0 | 22 23 0 | 24 0 4074 ------------------------------------- 4075 Proc2 25 26 27 | 0 0 28 | 29 0 4076 30 0 0 | 31 32 33 | 0 34 4077 .ve 4078 4079 This can be represented as a collection of submatrices as 4080 .vb 4081 A B C 4082 D E F 4083 G H I 4084 .ve 4085 4086 Where the submatrices A,B,C are owned by proc0, D,E,F are 4087 owned by proc1, G,H,I are owned by proc2. 4088 4089 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4090 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4091 The 'M','N' parameters are 8,8, and have the same values on all procs. 4092 4093 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4094 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4095 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4096 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4097 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4098 matrix, and [DF] as another `MATSEQAIJ` matrix. 4099 4100 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4101 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4102 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4103 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4104 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4105 In this case, the values of `d_nz`, `o_nz` are 4106 .vb 4107 proc0 dnz = 2, o_nz = 2 4108 proc1 dnz = 3, o_nz = 2 4109 proc2 dnz = 1, o_nz = 4 4110 .ve 4111 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4112 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4113 for proc3. i.e we are using 12+15+10=37 storage locations to store 4114 34 values. 4115 4116 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4117 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4118 In the above case the values for `d_nnz`, `o_nnz` are 4119 .vb 4120 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4121 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4122 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4123 .ve 4124 Here the space allocated is sum of all the above values i.e 34, and 4125 hence pre-allocation is perfect. 4126 4127 Level: intermediate 4128 4129 Notes: 4130 If the *_nnz parameter is given then the *_nz parameter is ignored 4131 4132 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4133 storage. The stored row and column indices begin with zero. 4134 See [Sparse Matrices](sec_matsparse) for details. 4135 4136 The parallel matrix is partitioned such that the first m0 rows belong to 4137 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4138 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4139 4140 The DIAGONAL portion of the local submatrix of a processor can be defined 4141 as the submatrix which is obtained by extraction the part corresponding to 4142 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4143 first row that belongs to the processor, r2 is the last row belonging to 4144 the this processor, and c1-c2 is range of indices of the local part of a 4145 vector suitable for applying the matrix to. This is an mxn matrix. In the 4146 common case of a square matrix, the row and column ranges are the same and 4147 the DIAGONAL part is also square. The remaining portion of the local 4148 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4149 4150 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4151 4152 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4153 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4154 You can also run with the option `-info` and look for messages with the string 4155 malloc in them to see if additional memory allocation was needed. 4156 4157 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4158 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4159 @*/ 4160 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4161 { 4162 PetscFunctionBegin; 4163 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4164 PetscValidType(B, 1); 4165 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4166 PetscFunctionReturn(PETSC_SUCCESS); 4167 } 4168 4169 /*@ 4170 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4171 CSR format for the local rows. 4172 4173 Collective 4174 4175 Input Parameters: 4176 + comm - MPI communicator 4177 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4178 . n - This value should be the same as the local size used in creating the 4179 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4180 calculated if `N` is given) For square matrices n is almost always `m`. 4181 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4182 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4183 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4184 . j - global column indices 4185 - a - optional matrix values 4186 4187 Output Parameter: 4188 . mat - the matrix 4189 4190 Level: intermediate 4191 4192 Notes: 4193 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4194 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4195 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4196 4197 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4198 4199 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4200 4201 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4202 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4203 4204 The format which is used for the sparse matrix input, is equivalent to a 4205 row-major ordering, i.e., for the following matrix, the input data expected is 4206 as shown 4207 .vb 4208 1 0 0 4209 2 0 3 P0 4210 ------- 4211 4 5 6 P1 4212 4213 Process0 [P0] rows_owned=[0,1] 4214 i = {0,1,3} [size = nrow+1 = 2+1] 4215 j = {0,0,2} [size = 3] 4216 v = {1,2,3} [size = 3] 4217 4218 Process1 [P1] rows_owned=[2] 4219 i = {0,3} [size = nrow+1 = 1+1] 4220 j = {0,1,2} [size = 3] 4221 v = {4,5,6} [size = 3] 4222 .ve 4223 4224 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4225 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4226 @*/ 4227 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4228 { 4229 PetscFunctionBegin; 4230 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4231 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4232 PetscCall(MatCreate(comm, mat)); 4233 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4234 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4235 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4236 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4237 PetscFunctionReturn(PETSC_SUCCESS); 4238 } 4239 4240 /*@ 4241 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4242 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4243 from `MatCreateMPIAIJWithArrays()` 4244 4245 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4246 4247 Collective 4248 4249 Input Parameters: 4250 + mat - the matrix 4251 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4252 . n - This value should be the same as the local size used in creating the 4253 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4254 calculated if N is given) For square matrices n is almost always m. 4255 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4256 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4257 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4258 . J - column indices 4259 - v - matrix values 4260 4261 Level: deprecated 4262 4263 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4264 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4265 @*/ 4266 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4267 { 4268 PetscInt nnz, i; 4269 PetscBool nooffprocentries; 4270 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4271 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4272 PetscScalar *ad, *ao; 4273 PetscInt ldi, Iii, md; 4274 const PetscInt *Adi = Ad->i; 4275 PetscInt *ld = Aij->ld; 4276 4277 PetscFunctionBegin; 4278 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4279 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4280 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4281 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4282 4283 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4284 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4285 4286 for (i = 0; i < m; i++) { 4287 if (PetscDefined(USE_DEBUG)) { 4288 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4289 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4290 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4291 } 4292 } 4293 nnz = Ii[i + 1] - Ii[i]; 4294 Iii = Ii[i]; 4295 ldi = ld[i]; 4296 md = Adi[i + 1] - Adi[i]; 4297 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4298 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4299 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4300 ad += md; 4301 ao += nnz - md; 4302 } 4303 nooffprocentries = mat->nooffprocentries; 4304 mat->nooffprocentries = PETSC_TRUE; 4305 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4306 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4307 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4308 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4309 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4310 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4311 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4312 mat->nooffprocentries = nooffprocentries; 4313 PetscFunctionReturn(PETSC_SUCCESS); 4314 } 4315 4316 /*@ 4317 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4318 4319 Collective 4320 4321 Input Parameters: 4322 + mat - the matrix 4323 - v - matrix values, stored by row 4324 4325 Level: intermediate 4326 4327 Notes: 4328 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4329 4330 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4331 4332 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4333 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4334 @*/ 4335 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4336 { 4337 PetscInt nnz, i, m; 4338 PetscBool nooffprocentries; 4339 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4340 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4341 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4342 PetscScalar *ad, *ao; 4343 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4344 PetscInt ldi, Iii, md; 4345 PetscInt *ld = Aij->ld; 4346 4347 PetscFunctionBegin; 4348 m = mat->rmap->n; 4349 4350 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4351 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4352 Iii = 0; 4353 for (i = 0; i < m; i++) { 4354 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4355 ldi = ld[i]; 4356 md = Adi[i + 1] - Adi[i]; 4357 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4358 ad += md; 4359 if (ao) { 4360 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4361 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4362 ao += nnz - md; 4363 } 4364 Iii += nnz; 4365 } 4366 nooffprocentries = mat->nooffprocentries; 4367 mat->nooffprocentries = PETSC_TRUE; 4368 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4369 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4370 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4371 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4372 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4373 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4374 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4375 mat->nooffprocentries = nooffprocentries; 4376 PetscFunctionReturn(PETSC_SUCCESS); 4377 } 4378 4379 /*@ 4380 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4381 (the default parallel PETSc format). For good matrix assembly performance 4382 the user should preallocate the matrix storage by setting the parameters 4383 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4384 4385 Collective 4386 4387 Input Parameters: 4388 + comm - MPI communicator 4389 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4390 This value should be the same as the local size used in creating the 4391 y vector for the matrix-vector product y = Ax. 4392 . n - This value should be the same as the local size used in creating the 4393 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4394 calculated if N is given) For square matrices n is almost always m. 4395 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4396 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4397 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4398 (same value is used for all local rows) 4399 . d_nnz - array containing the number of nonzeros in the various rows of the 4400 DIAGONAL portion of the local submatrix (possibly different for each row) 4401 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4402 The size of this array is equal to the number of local rows, i.e 'm'. 4403 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4404 submatrix (same value is used for all local rows). 4405 - o_nnz - array containing the number of nonzeros in the various rows of the 4406 OFF-DIAGONAL portion of the local submatrix (possibly different for 4407 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4408 structure. The size of this array is equal to the number 4409 of local rows, i.e 'm'. 4410 4411 Output Parameter: 4412 . A - the matrix 4413 4414 Options Database Keys: 4415 + -mat_no_inode - Do not use inodes 4416 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4417 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4418 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4419 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4420 4421 Level: intermediate 4422 4423 Notes: 4424 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4425 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4426 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4427 4428 If the *_nnz parameter is given then the *_nz parameter is ignored 4429 4430 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4431 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4432 storage requirements for this matrix. 4433 4434 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4435 processor than it must be used on all processors that share the object for 4436 that argument. 4437 4438 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4439 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4440 4441 The user MUST specify either the local or global matrix dimensions 4442 (possibly both). 4443 4444 The parallel matrix is partitioned across processors such that the 4445 first `m0` rows belong to process 0, the next `m1` rows belong to 4446 process 1, the next `m2` rows belong to process 2, etc., where 4447 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4448 values corresponding to [m x N] submatrix. 4449 4450 The columns are logically partitioned with the n0 columns belonging 4451 to 0th partition, the next n1 columns belonging to the next 4452 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4453 4454 The DIAGONAL portion of the local submatrix on any given processor 4455 is the submatrix corresponding to the rows and columns m,n 4456 corresponding to the given processor. i.e diagonal matrix on 4457 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4458 etc. The remaining portion of the local submatrix [m x (N-n)] 4459 constitute the OFF-DIAGONAL portion. The example below better 4460 illustrates this concept. The two matrices, the DIAGONAL portion and 4461 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4462 4463 For a square global matrix we define each processor's diagonal portion 4464 to be its local rows and the corresponding columns (a square submatrix); 4465 each processor's off-diagonal portion encompasses the remainder of the 4466 local matrix (a rectangular submatrix). 4467 4468 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4469 4470 When calling this routine with a single process communicator, a matrix of 4471 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4472 type of communicator, use the construction mechanism 4473 .vb 4474 MatCreate(..., &A); 4475 MatSetType(A, MATMPIAIJ); 4476 MatSetSizes(A, m, n, M, N); 4477 MatMPIAIJSetPreallocation(A, ...); 4478 .ve 4479 4480 By default, this format uses inodes (identical nodes) when possible. 4481 We search for consecutive rows with the same nonzero structure, thereby 4482 reusing matrix information to achieve increased efficiency. 4483 4484 Example Usage: 4485 Consider the following 8x8 matrix with 34 non-zero values, that is 4486 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4487 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4488 as follows 4489 4490 .vb 4491 1 2 0 | 0 3 0 | 0 4 4492 Proc0 0 5 6 | 7 0 0 | 8 0 4493 9 0 10 | 11 0 0 | 12 0 4494 ------------------------------------- 4495 13 0 14 | 15 16 17 | 0 0 4496 Proc1 0 18 0 | 19 20 21 | 0 0 4497 0 0 0 | 22 23 0 | 24 0 4498 ------------------------------------- 4499 Proc2 25 26 27 | 0 0 28 | 29 0 4500 30 0 0 | 31 32 33 | 0 34 4501 .ve 4502 4503 This can be represented as a collection of submatrices as 4504 4505 .vb 4506 A B C 4507 D E F 4508 G H I 4509 .ve 4510 4511 Where the submatrices A,B,C are owned by proc0, D,E,F are 4512 owned by proc1, G,H,I are owned by proc2. 4513 4514 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4515 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4516 The 'M','N' parameters are 8,8, and have the same values on all procs. 4517 4518 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4519 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4520 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4521 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4522 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4523 matrix, and [DF] as another SeqAIJ matrix. 4524 4525 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4526 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4527 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4528 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4529 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4530 In this case, the values of `d_nz`,`o_nz` are 4531 .vb 4532 proc0 dnz = 2, o_nz = 2 4533 proc1 dnz = 3, o_nz = 2 4534 proc2 dnz = 1, o_nz = 4 4535 .ve 4536 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4537 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4538 for proc3. i.e we are using 12+15+10=37 storage locations to store 4539 34 values. 4540 4541 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4542 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4543 In the above case the values for d_nnz,o_nnz are 4544 .vb 4545 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4546 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4547 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4548 .ve 4549 Here the space allocated is sum of all the above values i.e 34, and 4550 hence pre-allocation is perfect. 4551 4552 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4553 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4554 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4555 @*/ 4556 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4557 { 4558 PetscMPIInt size; 4559 4560 PetscFunctionBegin; 4561 PetscCall(MatCreate(comm, A)); 4562 PetscCall(MatSetSizes(*A, m, n, M, N)); 4563 PetscCallMPI(MPI_Comm_size(comm, &size)); 4564 if (size > 1) { 4565 PetscCall(MatSetType(*A, MATMPIAIJ)); 4566 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4567 } else { 4568 PetscCall(MatSetType(*A, MATSEQAIJ)); 4569 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4570 } 4571 PetscFunctionReturn(PETSC_SUCCESS); 4572 } 4573 4574 /*MC 4575 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4576 4577 Synopsis: 4578 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4579 4580 Not Collective 4581 4582 Input Parameter: 4583 . A - the `MATMPIAIJ` matrix 4584 4585 Output Parameters: 4586 + Ad - the diagonal portion of the matrix 4587 . Ao - the off-diagonal portion of the matrix 4588 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4589 - ierr - error code 4590 4591 Level: advanced 4592 4593 Note: 4594 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4595 4596 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4597 M*/ 4598 4599 /*MC 4600 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4601 4602 Synopsis: 4603 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4604 4605 Not Collective 4606 4607 Input Parameters: 4608 + A - the `MATMPIAIJ` matrix 4609 . Ad - the diagonal portion of the matrix 4610 . Ao - the off-diagonal portion of the matrix 4611 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4612 - ierr - error code 4613 4614 Level: advanced 4615 4616 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4617 M*/ 4618 4619 /*@C 4620 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4621 4622 Not Collective 4623 4624 Input Parameter: 4625 . A - The `MATMPIAIJ` matrix 4626 4627 Output Parameters: 4628 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4629 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4630 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4631 4632 Level: intermediate 4633 4634 Note: 4635 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4636 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4637 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4638 local column numbers to global column numbers in the original matrix. 4639 4640 Fortran Notes: 4641 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4642 4643 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4644 @*/ 4645 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4646 { 4647 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4648 PetscBool flg; 4649 4650 PetscFunctionBegin; 4651 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4652 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4653 if (Ad) *Ad = a->A; 4654 if (Ao) *Ao = a->B; 4655 if (colmap) *colmap = a->garray; 4656 PetscFunctionReturn(PETSC_SUCCESS); 4657 } 4658 4659 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4660 { 4661 PetscInt m, N, i, rstart, nnz, Ii; 4662 PetscInt *indx; 4663 PetscScalar *values; 4664 MatType rootType; 4665 4666 PetscFunctionBegin; 4667 PetscCall(MatGetSize(inmat, &m, &N)); 4668 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4669 PetscInt *dnz, *onz, sum, bs, cbs; 4670 4671 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4672 /* Check sum(n) = N */ 4673 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4674 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4675 4676 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4677 rstart -= m; 4678 4679 MatPreallocateBegin(comm, m, n, dnz, onz); 4680 for (i = 0; i < m; i++) { 4681 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4682 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4683 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4684 } 4685 4686 PetscCall(MatCreate(comm, outmat)); 4687 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4688 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4689 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4690 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4691 PetscCall(MatSetType(*outmat, rootType)); 4692 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4693 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4694 MatPreallocateEnd(dnz, onz); 4695 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4696 } 4697 4698 /* numeric phase */ 4699 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4700 for (i = 0; i < m; i++) { 4701 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4702 Ii = i + rstart; 4703 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4704 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4705 } 4706 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4707 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4708 PetscFunctionReturn(PETSC_SUCCESS); 4709 } 4710 4711 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4712 { 4713 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4714 4715 PetscFunctionBegin; 4716 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4717 PetscCall(PetscFree(merge->id_r)); 4718 PetscCall(PetscFree(merge->len_s)); 4719 PetscCall(PetscFree(merge->len_r)); 4720 PetscCall(PetscFree(merge->bi)); 4721 PetscCall(PetscFree(merge->bj)); 4722 PetscCall(PetscFree(merge->buf_ri[0])); 4723 PetscCall(PetscFree(merge->buf_ri)); 4724 PetscCall(PetscFree(merge->buf_rj[0])); 4725 PetscCall(PetscFree(merge->buf_rj)); 4726 PetscCall(PetscFree(merge->coi)); 4727 PetscCall(PetscFree(merge->coj)); 4728 PetscCall(PetscFree(merge->owners_co)); 4729 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4730 PetscCall(PetscFree(merge)); 4731 PetscFunctionReturn(PETSC_SUCCESS); 4732 } 4733 4734 #include <../src/mat/utils/freespace.h> 4735 #include <petscbt.h> 4736 4737 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4738 { 4739 MPI_Comm comm; 4740 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4741 PetscMPIInt size, rank, taga, *len_s; 4742 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4743 PetscMPIInt proc, k; 4744 PetscInt **buf_ri, **buf_rj; 4745 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4746 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4747 MPI_Request *s_waits, *r_waits; 4748 MPI_Status *status; 4749 const MatScalar *aa, *a_a; 4750 MatScalar **abuf_r, *ba_i; 4751 Mat_Merge_SeqsToMPI *merge; 4752 PetscContainer container; 4753 4754 PetscFunctionBegin; 4755 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4756 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4757 4758 PetscCallMPI(MPI_Comm_size(comm, &size)); 4759 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4760 4761 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4762 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4763 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4764 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4765 aa = a_a; 4766 4767 bi = merge->bi; 4768 bj = merge->bj; 4769 buf_ri = merge->buf_ri; 4770 buf_rj = merge->buf_rj; 4771 4772 PetscCall(PetscMalloc1(size, &status)); 4773 owners = merge->rowmap->range; 4774 len_s = merge->len_s; 4775 4776 /* send and recv matrix values */ 4777 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4778 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4779 4780 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4781 for (proc = 0, k = 0; proc < size; proc++) { 4782 if (!len_s[proc]) continue; 4783 i = owners[proc]; 4784 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4785 k++; 4786 } 4787 4788 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4789 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4790 PetscCall(PetscFree(status)); 4791 4792 PetscCall(PetscFree(s_waits)); 4793 PetscCall(PetscFree(r_waits)); 4794 4795 /* insert mat values of mpimat */ 4796 PetscCall(PetscMalloc1(N, &ba_i)); 4797 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4798 4799 for (k = 0; k < merge->nrecv; k++) { 4800 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4801 nrows = *buf_ri_k[k]; 4802 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4803 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4804 } 4805 4806 /* set values of ba */ 4807 m = merge->rowmap->n; 4808 for (i = 0; i < m; i++) { 4809 arow = owners[rank] + i; 4810 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4811 bnzi = bi[i + 1] - bi[i]; 4812 PetscCall(PetscArrayzero(ba_i, bnzi)); 4813 4814 /* add local non-zero vals of this proc's seqmat into ba */ 4815 anzi = ai[arow + 1] - ai[arow]; 4816 aj = a->j + ai[arow]; 4817 aa = a_a + ai[arow]; 4818 nextaj = 0; 4819 for (j = 0; nextaj < anzi; j++) { 4820 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4821 ba_i[j] += aa[nextaj++]; 4822 } 4823 } 4824 4825 /* add received vals into ba */ 4826 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4827 /* i-th row */ 4828 if (i == *nextrow[k]) { 4829 anzi = *(nextai[k] + 1) - *nextai[k]; 4830 aj = buf_rj[k] + *nextai[k]; 4831 aa = abuf_r[k] + *nextai[k]; 4832 nextaj = 0; 4833 for (j = 0; nextaj < anzi; j++) { 4834 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4835 ba_i[j] += aa[nextaj++]; 4836 } 4837 } 4838 nextrow[k]++; 4839 nextai[k]++; 4840 } 4841 } 4842 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4843 } 4844 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4845 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4846 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4847 4848 PetscCall(PetscFree(abuf_r[0])); 4849 PetscCall(PetscFree(abuf_r)); 4850 PetscCall(PetscFree(ba_i)); 4851 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4852 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4853 PetscFunctionReturn(PETSC_SUCCESS); 4854 } 4855 4856 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4857 { 4858 Mat B_mpi; 4859 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4860 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4861 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4862 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4863 PetscInt len, *dnz, *onz, bs, cbs; 4864 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4865 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4866 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4867 MPI_Status *status; 4868 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4869 PetscBT lnkbt; 4870 Mat_Merge_SeqsToMPI *merge; 4871 PetscContainer container; 4872 4873 PetscFunctionBegin; 4874 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4875 4876 /* make sure it is a PETSc comm */ 4877 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4878 PetscCallMPI(MPI_Comm_size(comm, &size)); 4879 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4880 4881 PetscCall(PetscNew(&merge)); 4882 PetscCall(PetscMalloc1(size, &status)); 4883 4884 /* determine row ownership */ 4885 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4886 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4887 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4888 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4889 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4890 PetscCall(PetscMalloc1(size, &len_si)); 4891 PetscCall(PetscMalloc1(size, &merge->len_s)); 4892 4893 m = merge->rowmap->n; 4894 owners = merge->rowmap->range; 4895 4896 /* determine the number of messages to send, their lengths */ 4897 len_s = merge->len_s; 4898 4899 len = 0; /* length of buf_si[] */ 4900 merge->nsend = 0; 4901 for (PetscMPIInt proc = 0; proc < size; proc++) { 4902 len_si[proc] = 0; 4903 if (proc == rank) { 4904 len_s[proc] = 0; 4905 } else { 4906 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4907 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4908 } 4909 if (len_s[proc]) { 4910 merge->nsend++; 4911 nrows = 0; 4912 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4913 if (ai[i + 1] > ai[i]) nrows++; 4914 } 4915 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4916 len += len_si[proc]; 4917 } 4918 } 4919 4920 /* determine the number and length of messages to receive for ij-structure */ 4921 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4922 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4923 4924 /* post the Irecv of j-structure */ 4925 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4926 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4927 4928 /* post the Isend of j-structure */ 4929 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4930 4931 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4932 if (!len_s[proc]) continue; 4933 i = owners[proc]; 4934 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4935 k++; 4936 } 4937 4938 /* receives and sends of j-structure are complete */ 4939 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4940 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4941 4942 /* send and recv i-structure */ 4943 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4944 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4945 4946 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4947 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4948 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4949 if (!len_s[proc]) continue; 4950 /* form outgoing message for i-structure: 4951 buf_si[0]: nrows to be sent 4952 [1:nrows]: row index (global) 4953 [nrows+1:2*nrows+1]: i-structure index 4954 */ 4955 nrows = len_si[proc] / 2 - 1; 4956 buf_si_i = buf_si + nrows + 1; 4957 buf_si[0] = nrows; 4958 buf_si_i[0] = 0; 4959 nrows = 0; 4960 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4961 anzi = ai[i + 1] - ai[i]; 4962 if (anzi) { 4963 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4964 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4965 nrows++; 4966 } 4967 } 4968 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4969 k++; 4970 buf_si += len_si[proc]; 4971 } 4972 4973 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4974 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4975 4976 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4977 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4978 4979 PetscCall(PetscFree(len_si)); 4980 PetscCall(PetscFree(len_ri)); 4981 PetscCall(PetscFree(rj_waits)); 4982 PetscCall(PetscFree2(si_waits, sj_waits)); 4983 PetscCall(PetscFree(ri_waits)); 4984 PetscCall(PetscFree(buf_s)); 4985 PetscCall(PetscFree(status)); 4986 4987 /* compute a local seq matrix in each processor */ 4988 /* allocate bi array and free space for accumulating nonzero column info */ 4989 PetscCall(PetscMalloc1(m + 1, &bi)); 4990 bi[0] = 0; 4991 4992 /* create and initialize a linked list */ 4993 nlnk = N + 1; 4994 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4995 4996 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4997 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4998 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4999 5000 current_space = free_space; 5001 5002 /* determine symbolic info for each local row */ 5003 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5004 5005 for (k = 0; k < merge->nrecv; k++) { 5006 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5007 nrows = *buf_ri_k[k]; 5008 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5009 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5010 } 5011 5012 MatPreallocateBegin(comm, m, n, dnz, onz); 5013 len = 0; 5014 for (i = 0; i < m; i++) { 5015 bnzi = 0; 5016 /* add local non-zero cols of this proc's seqmat into lnk */ 5017 arow = owners[rank] + i; 5018 anzi = ai[arow + 1] - ai[arow]; 5019 aj = a->j + ai[arow]; 5020 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5021 bnzi += nlnk; 5022 /* add received col data into lnk */ 5023 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5024 if (i == *nextrow[k]) { /* i-th row */ 5025 anzi = *(nextai[k] + 1) - *nextai[k]; 5026 aj = buf_rj[k] + *nextai[k]; 5027 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5028 bnzi += nlnk; 5029 nextrow[k]++; 5030 nextai[k]++; 5031 } 5032 } 5033 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5034 5035 /* if free space is not available, make more free space */ 5036 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5037 /* copy data into free space, then initialize lnk */ 5038 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5039 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5040 5041 current_space->array += bnzi; 5042 current_space->local_used += bnzi; 5043 current_space->local_remaining -= bnzi; 5044 5045 bi[i + 1] = bi[i] + bnzi; 5046 } 5047 5048 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5049 5050 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5051 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5052 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5053 5054 /* create symbolic parallel matrix B_mpi */ 5055 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5056 PetscCall(MatCreate(comm, &B_mpi)); 5057 if (n == PETSC_DECIDE) { 5058 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5059 } else { 5060 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5061 } 5062 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5063 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5064 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5065 MatPreallocateEnd(dnz, onz); 5066 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5067 5068 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5069 B_mpi->assembled = PETSC_FALSE; 5070 merge->bi = bi; 5071 merge->bj = bj; 5072 merge->buf_ri = buf_ri; 5073 merge->buf_rj = buf_rj; 5074 merge->coi = NULL; 5075 merge->coj = NULL; 5076 merge->owners_co = NULL; 5077 5078 PetscCall(PetscCommDestroy(&comm)); 5079 5080 /* attach the supporting struct to B_mpi for reuse */ 5081 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5082 PetscCall(PetscContainerSetPointer(container, merge)); 5083 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5084 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5085 PetscCall(PetscContainerDestroy(&container)); 5086 *mpimat = B_mpi; 5087 5088 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5089 PetscFunctionReturn(PETSC_SUCCESS); 5090 } 5091 5092 /*@ 5093 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5094 matrices from each processor 5095 5096 Collective 5097 5098 Input Parameters: 5099 + comm - the communicators the parallel matrix will live on 5100 . seqmat - the input sequential matrices 5101 . m - number of local rows (or `PETSC_DECIDE`) 5102 . n - number of local columns (or `PETSC_DECIDE`) 5103 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5104 5105 Output Parameter: 5106 . mpimat - the parallel matrix generated 5107 5108 Level: advanced 5109 5110 Note: 5111 The dimensions of the sequential matrix in each processor MUST be the same. 5112 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5113 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5114 5115 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5116 @*/ 5117 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5118 { 5119 PetscMPIInt size; 5120 5121 PetscFunctionBegin; 5122 PetscCallMPI(MPI_Comm_size(comm, &size)); 5123 if (size == 1) { 5124 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5125 if (scall == MAT_INITIAL_MATRIX) { 5126 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5127 } else { 5128 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5129 } 5130 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 PetscFunctionReturn(PETSC_SUCCESS); 5132 } 5133 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5134 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5135 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5136 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 PetscFunctionReturn(PETSC_SUCCESS); 5138 } 5139 5140 /*@ 5141 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5142 5143 Not Collective 5144 5145 Input Parameter: 5146 . A - the matrix 5147 5148 Output Parameter: 5149 . A_loc - the local sequential matrix generated 5150 5151 Level: developer 5152 5153 Notes: 5154 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5155 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5156 `n` is the global column count obtained with `MatGetSize()` 5157 5158 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5159 5160 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5161 5162 Destroy the matrix with `MatDestroy()` 5163 5164 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5165 @*/ 5166 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5167 { 5168 PetscBool mpi; 5169 5170 PetscFunctionBegin; 5171 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5172 if (mpi) { 5173 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5174 } else { 5175 *A_loc = A; 5176 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5177 } 5178 PetscFunctionReturn(PETSC_SUCCESS); 5179 } 5180 5181 /*@ 5182 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5183 5184 Not Collective 5185 5186 Input Parameters: 5187 + A - the matrix 5188 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5189 5190 Output Parameter: 5191 . A_loc - the local sequential matrix generated 5192 5193 Level: developer 5194 5195 Notes: 5196 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5197 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5198 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5199 5200 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5201 5202 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5203 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5204 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5205 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5206 5207 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5208 @*/ 5209 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5210 { 5211 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5212 Mat_SeqAIJ *mat, *a, *b; 5213 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5214 const PetscScalar *aa, *ba, *aav, *bav; 5215 PetscScalar *ca, *cam; 5216 PetscMPIInt size; 5217 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5218 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5219 PetscBool match; 5220 5221 PetscFunctionBegin; 5222 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5223 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5224 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5225 if (size == 1) { 5226 if (scall == MAT_INITIAL_MATRIX) { 5227 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5228 *A_loc = mpimat->A; 5229 } else if (scall == MAT_REUSE_MATRIX) { 5230 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5231 } 5232 PetscFunctionReturn(PETSC_SUCCESS); 5233 } 5234 5235 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5236 a = (Mat_SeqAIJ *)mpimat->A->data; 5237 b = (Mat_SeqAIJ *)mpimat->B->data; 5238 ai = a->i; 5239 aj = a->j; 5240 bi = b->i; 5241 bj = b->j; 5242 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5243 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5244 aa = aav; 5245 ba = bav; 5246 if (scall == MAT_INITIAL_MATRIX) { 5247 PetscCall(PetscMalloc1(1 + am, &ci)); 5248 ci[0] = 0; 5249 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5250 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5251 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5252 k = 0; 5253 for (i = 0; i < am; i++) { 5254 ncols_o = bi[i + 1] - bi[i]; 5255 ncols_d = ai[i + 1] - ai[i]; 5256 /* off-diagonal portion of A */ 5257 for (jo = 0; jo < ncols_o; jo++) { 5258 col = cmap[*bj]; 5259 if (col >= cstart) break; 5260 cj[k] = col; 5261 bj++; 5262 ca[k++] = *ba++; 5263 } 5264 /* diagonal portion of A */ 5265 for (j = 0; j < ncols_d; j++) { 5266 cj[k] = cstart + *aj++; 5267 ca[k++] = *aa++; 5268 } 5269 /* off-diagonal portion of A */ 5270 for (j = jo; j < ncols_o; j++) { 5271 cj[k] = cmap[*bj++]; 5272 ca[k++] = *ba++; 5273 } 5274 } 5275 /* put together the new matrix */ 5276 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5277 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5278 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5279 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5280 mat->free_a = PETSC_TRUE; 5281 mat->free_ij = PETSC_TRUE; 5282 mat->nonew = 0; 5283 } else if (scall == MAT_REUSE_MATRIX) { 5284 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5285 ci = mat->i; 5286 cj = mat->j; 5287 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5288 for (i = 0; i < am; i++) { 5289 /* off-diagonal portion of A */ 5290 ncols_o = bi[i + 1] - bi[i]; 5291 for (jo = 0; jo < ncols_o; jo++) { 5292 col = cmap[*bj]; 5293 if (col >= cstart) break; 5294 *cam++ = *ba++; 5295 bj++; 5296 } 5297 /* diagonal portion of A */ 5298 ncols_d = ai[i + 1] - ai[i]; 5299 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5300 /* off-diagonal portion of A */ 5301 for (j = jo; j < ncols_o; j++) { 5302 *cam++ = *ba++; 5303 bj++; 5304 } 5305 } 5306 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5307 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5308 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5309 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5310 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5311 PetscFunctionReturn(PETSC_SUCCESS); 5312 } 5313 5314 /*@ 5315 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5316 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5317 5318 Not Collective 5319 5320 Input Parameters: 5321 + A - the matrix 5322 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5323 5324 Output Parameters: 5325 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5326 - A_loc - the local sequential matrix generated 5327 5328 Level: developer 5329 5330 Note: 5331 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5332 part, then those associated with the off-diagonal part (in its local ordering) 5333 5334 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5335 @*/ 5336 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5337 { 5338 Mat Ao, Ad; 5339 const PetscInt *cmap; 5340 PetscMPIInt size; 5341 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5342 5343 PetscFunctionBegin; 5344 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5345 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5346 if (size == 1) { 5347 if (scall == MAT_INITIAL_MATRIX) { 5348 PetscCall(PetscObjectReference((PetscObject)Ad)); 5349 *A_loc = Ad; 5350 } else if (scall == MAT_REUSE_MATRIX) { 5351 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5352 } 5353 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5354 PetscFunctionReturn(PETSC_SUCCESS); 5355 } 5356 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5357 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5358 if (f) { 5359 PetscCall((*f)(A, scall, glob, A_loc)); 5360 } else { 5361 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5362 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5363 Mat_SeqAIJ *c; 5364 PetscInt *ai = a->i, *aj = a->j; 5365 PetscInt *bi = b->i, *bj = b->j; 5366 PetscInt *ci, *cj; 5367 const PetscScalar *aa, *ba; 5368 PetscScalar *ca; 5369 PetscInt i, j, am, dn, on; 5370 5371 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5372 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5373 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5374 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5375 if (scall == MAT_INITIAL_MATRIX) { 5376 PetscInt k; 5377 PetscCall(PetscMalloc1(1 + am, &ci)); 5378 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5379 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5380 ci[0] = 0; 5381 for (i = 0, k = 0; i < am; i++) { 5382 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5383 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5384 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5385 /* diagonal portion of A */ 5386 for (j = 0; j < ncols_d; j++, k++) { 5387 cj[k] = *aj++; 5388 ca[k] = *aa++; 5389 } 5390 /* off-diagonal portion of A */ 5391 for (j = 0; j < ncols_o; j++, k++) { 5392 cj[k] = dn + *bj++; 5393 ca[k] = *ba++; 5394 } 5395 } 5396 /* put together the new matrix */ 5397 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5398 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5399 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5400 c = (Mat_SeqAIJ *)(*A_loc)->data; 5401 c->free_a = PETSC_TRUE; 5402 c->free_ij = PETSC_TRUE; 5403 c->nonew = 0; 5404 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5405 } else if (scall == MAT_REUSE_MATRIX) { 5406 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5407 for (i = 0; i < am; i++) { 5408 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5409 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5410 /* diagonal portion of A */ 5411 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5412 /* off-diagonal portion of A */ 5413 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5414 } 5415 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5416 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5417 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5418 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5419 if (glob) { 5420 PetscInt cst, *gidx; 5421 5422 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5423 PetscCall(PetscMalloc1(dn + on, &gidx)); 5424 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5425 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5426 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5427 } 5428 } 5429 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5430 PetscFunctionReturn(PETSC_SUCCESS); 5431 } 5432 5433 /*@C 5434 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5435 5436 Not Collective 5437 5438 Input Parameters: 5439 + A - the matrix 5440 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5441 . row - index set of rows to extract (or `NULL`) 5442 - col - index set of columns to extract (or `NULL`) 5443 5444 Output Parameter: 5445 . A_loc - the local sequential matrix generated 5446 5447 Level: developer 5448 5449 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5450 @*/ 5451 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5452 { 5453 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5454 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5455 IS isrowa, iscola; 5456 Mat *aloc; 5457 PetscBool match; 5458 5459 PetscFunctionBegin; 5460 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5461 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5462 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5463 if (!row) { 5464 start = A->rmap->rstart; 5465 end = A->rmap->rend; 5466 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5467 } else { 5468 isrowa = *row; 5469 } 5470 if (!col) { 5471 start = A->cmap->rstart; 5472 cmap = a->garray; 5473 nzA = a->A->cmap->n; 5474 nzB = a->B->cmap->n; 5475 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5476 ncols = 0; 5477 for (i = 0; i < nzB; i++) { 5478 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5479 else break; 5480 } 5481 imark = i; 5482 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5483 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5484 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5485 } else { 5486 iscola = *col; 5487 } 5488 if (scall != MAT_INITIAL_MATRIX) { 5489 PetscCall(PetscMalloc1(1, &aloc)); 5490 aloc[0] = *A_loc; 5491 } 5492 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5493 if (!col) { /* attach global id of condensed columns */ 5494 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5495 } 5496 *A_loc = aloc[0]; 5497 PetscCall(PetscFree(aloc)); 5498 if (!row) PetscCall(ISDestroy(&isrowa)); 5499 if (!col) PetscCall(ISDestroy(&iscola)); 5500 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5501 PetscFunctionReturn(PETSC_SUCCESS); 5502 } 5503 5504 /* 5505 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5506 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5507 * on a global size. 5508 * */ 5509 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5510 { 5511 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5512 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5513 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5514 PetscMPIInt owner; 5515 PetscSFNode *iremote, *oiremote; 5516 const PetscInt *lrowindices; 5517 PetscSF sf, osf; 5518 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5519 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5520 MPI_Comm comm; 5521 ISLocalToGlobalMapping mapping; 5522 const PetscScalar *pd_a, *po_a; 5523 5524 PetscFunctionBegin; 5525 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5526 /* plocalsize is the number of roots 5527 * nrows is the number of leaves 5528 * */ 5529 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5530 PetscCall(ISGetLocalSize(rows, &nrows)); 5531 PetscCall(PetscCalloc1(nrows, &iremote)); 5532 PetscCall(ISGetIndices(rows, &lrowindices)); 5533 for (i = 0; i < nrows; i++) { 5534 /* Find a remote index and an owner for a row 5535 * The row could be local or remote 5536 * */ 5537 owner = 0; 5538 lidx = 0; 5539 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5540 iremote[i].index = lidx; 5541 iremote[i].rank = owner; 5542 } 5543 /* Create SF to communicate how many nonzero columns for each row */ 5544 PetscCall(PetscSFCreate(comm, &sf)); 5545 /* SF will figure out the number of nonzero columns for each row, and their 5546 * offsets 5547 * */ 5548 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5549 PetscCall(PetscSFSetFromOptions(sf)); 5550 PetscCall(PetscSFSetUp(sf)); 5551 5552 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5553 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5554 PetscCall(PetscCalloc1(nrows, &pnnz)); 5555 roffsets[0] = 0; 5556 roffsets[1] = 0; 5557 for (i = 0; i < plocalsize; i++) { 5558 /* diagonal */ 5559 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5560 /* off-diagonal */ 5561 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5562 /* compute offsets so that we relative location for each row */ 5563 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5564 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5565 } 5566 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5567 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5568 /* 'r' means root, and 'l' means leaf */ 5569 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5570 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5571 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5572 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5573 PetscCall(PetscSFDestroy(&sf)); 5574 PetscCall(PetscFree(roffsets)); 5575 PetscCall(PetscFree(nrcols)); 5576 dntotalcols = 0; 5577 ontotalcols = 0; 5578 ncol = 0; 5579 for (i = 0; i < nrows; i++) { 5580 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5581 ncol = PetscMax(pnnz[i], ncol); 5582 /* diagonal */ 5583 dntotalcols += nlcols[i * 2 + 0]; 5584 /* off-diagonal */ 5585 ontotalcols += nlcols[i * 2 + 1]; 5586 } 5587 /* We do not need to figure the right number of columns 5588 * since all the calculations will be done by going through the raw data 5589 * */ 5590 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5591 PetscCall(MatSetUp(*P_oth)); 5592 PetscCall(PetscFree(pnnz)); 5593 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5594 /* diagonal */ 5595 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5596 /* off-diagonal */ 5597 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5598 /* diagonal */ 5599 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5600 /* off-diagonal */ 5601 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5602 dntotalcols = 0; 5603 ontotalcols = 0; 5604 ntotalcols = 0; 5605 for (i = 0; i < nrows; i++) { 5606 owner = 0; 5607 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5608 /* Set iremote for diag matrix */ 5609 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5610 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5611 iremote[dntotalcols].rank = owner; 5612 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5613 ilocal[dntotalcols++] = ntotalcols++; 5614 } 5615 /* off-diagonal */ 5616 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5617 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5618 oiremote[ontotalcols].rank = owner; 5619 oilocal[ontotalcols++] = ntotalcols++; 5620 } 5621 } 5622 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5623 PetscCall(PetscFree(loffsets)); 5624 PetscCall(PetscFree(nlcols)); 5625 PetscCall(PetscSFCreate(comm, &sf)); 5626 /* P serves as roots and P_oth is leaves 5627 * Diag matrix 5628 * */ 5629 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5630 PetscCall(PetscSFSetFromOptions(sf)); 5631 PetscCall(PetscSFSetUp(sf)); 5632 5633 PetscCall(PetscSFCreate(comm, &osf)); 5634 /* off-diagonal */ 5635 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5636 PetscCall(PetscSFSetFromOptions(osf)); 5637 PetscCall(PetscSFSetUp(osf)); 5638 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5639 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5640 /* operate on the matrix internal data to save memory */ 5641 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5642 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5643 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5644 /* Convert to global indices for diag matrix */ 5645 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5646 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5647 /* We want P_oth store global indices */ 5648 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5649 /* Use memory scalable approach */ 5650 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5651 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5652 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5653 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5654 /* Convert back to local indices */ 5655 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5656 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5657 nout = 0; 5658 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5659 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5660 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5661 /* Exchange values */ 5662 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5663 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5664 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5665 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5666 /* Stop PETSc from shrinking memory */ 5667 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5668 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5669 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5670 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5671 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5672 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5673 PetscCall(PetscSFDestroy(&sf)); 5674 PetscCall(PetscSFDestroy(&osf)); 5675 PetscFunctionReturn(PETSC_SUCCESS); 5676 } 5677 5678 /* 5679 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5680 * This supports MPIAIJ and MAIJ 5681 * */ 5682 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5683 { 5684 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5685 Mat_SeqAIJ *p_oth; 5686 IS rows, map; 5687 PetscHMapI hamp; 5688 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5689 MPI_Comm comm; 5690 PetscSF sf, osf; 5691 PetscBool has; 5692 5693 PetscFunctionBegin; 5694 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5695 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5696 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5697 * and then create a submatrix (that often is an overlapping matrix) 5698 * */ 5699 if (reuse == MAT_INITIAL_MATRIX) { 5700 /* Use a hash table to figure out unique keys */ 5701 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5702 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5703 count = 0; 5704 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5705 for (i = 0; i < a->B->cmap->n; i++) { 5706 key = a->garray[i] / dof; 5707 PetscCall(PetscHMapIHas(hamp, key, &has)); 5708 if (!has) { 5709 mapping[i] = count; 5710 PetscCall(PetscHMapISet(hamp, key, count++)); 5711 } else { 5712 /* Current 'i' has the same value the previous step */ 5713 mapping[i] = count - 1; 5714 } 5715 } 5716 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5717 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5718 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5719 PetscCall(PetscCalloc1(htsize, &rowindices)); 5720 off = 0; 5721 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5722 PetscCall(PetscHMapIDestroy(&hamp)); 5723 PetscCall(PetscSortInt(htsize, rowindices)); 5724 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5725 /* In case, the matrix was already created but users want to recreate the matrix */ 5726 PetscCall(MatDestroy(P_oth)); 5727 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5728 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5729 PetscCall(ISDestroy(&map)); 5730 PetscCall(ISDestroy(&rows)); 5731 } else if (reuse == MAT_REUSE_MATRIX) { 5732 /* If matrix was already created, we simply update values using SF objects 5733 * that as attached to the matrix earlier. 5734 */ 5735 const PetscScalar *pd_a, *po_a; 5736 5737 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5738 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5739 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5740 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5741 /* Update values in place */ 5742 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5743 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5744 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5745 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5746 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5747 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5748 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5749 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5750 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5751 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5752 PetscFunctionReturn(PETSC_SUCCESS); 5753 } 5754 5755 /*@C 5756 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5757 5758 Collective 5759 5760 Input Parameters: 5761 + A - the first matrix in `MATMPIAIJ` format 5762 . B - the second matrix in `MATMPIAIJ` format 5763 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5764 5765 Output Parameters: 5766 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5767 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5768 - B_seq - the sequential matrix generated 5769 5770 Level: developer 5771 5772 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5773 @*/ 5774 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5775 { 5776 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5777 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5778 IS isrowb, iscolb; 5779 Mat *bseq = NULL; 5780 5781 PetscFunctionBegin; 5782 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5783 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5784 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5785 5786 if (scall == MAT_INITIAL_MATRIX) { 5787 start = A->cmap->rstart; 5788 cmap = a->garray; 5789 nzA = a->A->cmap->n; 5790 nzB = a->B->cmap->n; 5791 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5792 ncols = 0; 5793 for (i = 0; i < nzB; i++) { /* row < local row index */ 5794 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5795 else break; 5796 } 5797 imark = i; 5798 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5799 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5800 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5801 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5802 } else { 5803 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5804 isrowb = *rowb; 5805 iscolb = *colb; 5806 PetscCall(PetscMalloc1(1, &bseq)); 5807 bseq[0] = *B_seq; 5808 } 5809 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5810 *B_seq = bseq[0]; 5811 PetscCall(PetscFree(bseq)); 5812 if (!rowb) { 5813 PetscCall(ISDestroy(&isrowb)); 5814 } else { 5815 *rowb = isrowb; 5816 } 5817 if (!colb) { 5818 PetscCall(ISDestroy(&iscolb)); 5819 } else { 5820 *colb = iscolb; 5821 } 5822 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5823 PetscFunctionReturn(PETSC_SUCCESS); 5824 } 5825 5826 /* 5827 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5828 of the OFF-DIAGONAL portion of local A 5829 5830 Collective 5831 5832 Input Parameters: 5833 + A,B - the matrices in `MATMPIAIJ` format 5834 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5835 5836 Output Parameter: 5837 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5838 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5839 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5840 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5841 5842 Developer Note: 5843 This directly accesses information inside the VecScatter associated with the matrix-vector product 5844 for this matrix. This is not desirable.. 5845 5846 Level: developer 5847 5848 */ 5849 5850 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5851 { 5852 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5853 VecScatter ctx; 5854 MPI_Comm comm; 5855 const PetscMPIInt *rprocs, *sprocs; 5856 PetscMPIInt nrecvs, nsends; 5857 const PetscInt *srow, *rstarts, *sstarts; 5858 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5859 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5860 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5861 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5862 PetscMPIInt size, tag, rank, nreqs; 5863 5864 PetscFunctionBegin; 5865 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5866 PetscCallMPI(MPI_Comm_size(comm, &size)); 5867 5868 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5869 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5870 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5871 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5872 5873 if (size == 1) { 5874 startsj_s = NULL; 5875 bufa_ptr = NULL; 5876 *B_oth = NULL; 5877 PetscFunctionReturn(PETSC_SUCCESS); 5878 } 5879 5880 ctx = a->Mvctx; 5881 tag = ((PetscObject)ctx)->tag; 5882 5883 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5884 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5885 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5886 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5887 PetscCall(PetscMalloc1(nreqs, &reqs)); 5888 rwaits = reqs; 5889 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5890 5891 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5892 if (scall == MAT_INITIAL_MATRIX) { 5893 /* i-array */ 5894 /* post receives */ 5895 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5896 for (i = 0; i < nrecvs; i++) { 5897 rowlen = rvalues + rstarts[i] * rbs; 5898 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5899 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5900 } 5901 5902 /* pack the outgoing message */ 5903 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5904 5905 sstartsj[0] = 0; 5906 rstartsj[0] = 0; 5907 len = 0; /* total length of j or a array to be sent */ 5908 if (nsends) { 5909 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5910 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5911 } 5912 for (i = 0; i < nsends; i++) { 5913 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5914 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5915 for (j = 0; j < nrows; j++) { 5916 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5917 for (l = 0; l < sbs; l++) { 5918 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5919 5920 rowlen[j * sbs + l] = ncols; 5921 5922 len += ncols; 5923 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5924 } 5925 k++; 5926 } 5927 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5928 5929 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5930 } 5931 /* recvs and sends of i-array are completed */ 5932 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5933 PetscCall(PetscFree(svalues)); 5934 5935 /* allocate buffers for sending j and a arrays */ 5936 PetscCall(PetscMalloc1(len + 1, &bufj)); 5937 PetscCall(PetscMalloc1(len + 1, &bufa)); 5938 5939 /* create i-array of B_oth */ 5940 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5941 5942 b_othi[0] = 0; 5943 len = 0; /* total length of j or a array to be received */ 5944 k = 0; 5945 for (i = 0; i < nrecvs; i++) { 5946 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5947 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5948 for (j = 0; j < nrows; j++) { 5949 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5950 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5951 k++; 5952 } 5953 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5954 } 5955 PetscCall(PetscFree(rvalues)); 5956 5957 /* allocate space for j and a arrays of B_oth */ 5958 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5959 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5960 5961 /* j-array */ 5962 /* post receives of j-array */ 5963 for (i = 0; i < nrecvs; i++) { 5964 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5965 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5966 } 5967 5968 /* pack the outgoing message j-array */ 5969 if (nsends) k = sstarts[0]; 5970 for (i = 0; i < nsends; i++) { 5971 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5972 bufJ = bufj + sstartsj[i]; 5973 for (j = 0; j < nrows; j++) { 5974 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5975 for (ll = 0; ll < sbs; ll++) { 5976 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5977 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5978 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5979 } 5980 } 5981 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5982 } 5983 5984 /* recvs and sends of j-array are completed */ 5985 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5986 } else if (scall == MAT_REUSE_MATRIX) { 5987 sstartsj = *startsj_s; 5988 rstartsj = *startsj_r; 5989 bufa = *bufa_ptr; 5990 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5991 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5992 5993 /* a-array */ 5994 /* post receives of a-array */ 5995 for (i = 0; i < nrecvs; i++) { 5996 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5997 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5998 } 5999 6000 /* pack the outgoing message a-array */ 6001 if (nsends) k = sstarts[0]; 6002 for (i = 0; i < nsends; i++) { 6003 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6004 bufA = bufa + sstartsj[i]; 6005 for (j = 0; j < nrows; j++) { 6006 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6007 for (ll = 0; ll < sbs; ll++) { 6008 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6009 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6010 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6011 } 6012 } 6013 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6014 } 6015 /* recvs and sends of a-array are completed */ 6016 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6017 PetscCall(PetscFree(reqs)); 6018 6019 if (scall == MAT_INITIAL_MATRIX) { 6020 Mat_SeqAIJ *b_oth; 6021 6022 /* put together the new matrix */ 6023 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6024 6025 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6026 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6027 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6028 b_oth->free_a = PETSC_TRUE; 6029 b_oth->free_ij = PETSC_TRUE; 6030 b_oth->nonew = 0; 6031 6032 PetscCall(PetscFree(bufj)); 6033 if (!startsj_s || !bufa_ptr) { 6034 PetscCall(PetscFree2(sstartsj, rstartsj)); 6035 PetscCall(PetscFree(bufa_ptr)); 6036 } else { 6037 *startsj_s = sstartsj; 6038 *startsj_r = rstartsj; 6039 *bufa_ptr = bufa; 6040 } 6041 } else if (scall == MAT_REUSE_MATRIX) { 6042 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6043 } 6044 6045 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6046 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6047 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6048 PetscFunctionReturn(PETSC_SUCCESS); 6049 } 6050 6051 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6052 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6053 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6054 #if defined(PETSC_HAVE_MKL_SPARSE) 6055 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6056 #endif 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6059 #if defined(PETSC_HAVE_ELEMENTAL) 6060 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6061 #endif 6062 #if defined(PETSC_HAVE_SCALAPACK) 6063 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6064 #endif 6065 #if defined(PETSC_HAVE_HYPRE) 6066 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_CUDA) 6069 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 #if defined(PETSC_HAVE_HIP) 6072 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6073 #endif 6074 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6076 #endif 6077 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6078 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6079 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6080 6081 /* 6082 Computes (B'*A')' since computing B*A directly is untenable 6083 6084 n p p 6085 [ ] [ ] [ ] 6086 m [ A ] * n [ B ] = m [ C ] 6087 [ ] [ ] [ ] 6088 6089 */ 6090 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6091 { 6092 Mat At, Bt, Ct; 6093 6094 PetscFunctionBegin; 6095 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6096 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6097 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6098 PetscCall(MatDestroy(&At)); 6099 PetscCall(MatDestroy(&Bt)); 6100 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6101 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6102 PetscCall(MatDestroy(&Ct)); 6103 PetscFunctionReturn(PETSC_SUCCESS); 6104 } 6105 6106 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6107 { 6108 PetscBool cisdense; 6109 6110 PetscFunctionBegin; 6111 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6112 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6113 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6114 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6115 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6116 PetscCall(MatSetUp(C)); 6117 6118 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6119 PetscFunctionReturn(PETSC_SUCCESS); 6120 } 6121 6122 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6123 { 6124 Mat_Product *product = C->product; 6125 Mat A = product->A, B = product->B; 6126 6127 PetscFunctionBegin; 6128 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6129 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6130 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6131 C->ops->productsymbolic = MatProductSymbolic_AB; 6132 PetscFunctionReturn(PETSC_SUCCESS); 6133 } 6134 6135 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6136 { 6137 Mat_Product *product = C->product; 6138 6139 PetscFunctionBegin; 6140 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6141 PetscFunctionReturn(PETSC_SUCCESS); 6142 } 6143 6144 /* 6145 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6146 6147 Input Parameters: 6148 6149 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6150 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6151 6152 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6153 6154 For Set1, j1[] contains column indices of the nonzeros. 6155 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6156 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6157 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6158 6159 Similar for Set2. 6160 6161 This routine merges the two sets of nonzeros row by row and removes repeats. 6162 6163 Output Parameters: (memory is allocated by the caller) 6164 6165 i[],j[]: the CSR of the merged matrix, which has m rows. 6166 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6167 imap2[]: similar to imap1[], but for Set2. 6168 Note we order nonzeros row-by-row and from left to right. 6169 */ 6170 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6171 { 6172 PetscInt r, m; /* Row index of mat */ 6173 PetscCount t, t1, t2, b1, e1, b2, e2; 6174 6175 PetscFunctionBegin; 6176 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6177 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6178 i[0] = 0; 6179 for (r = 0; r < m; r++) { /* Do row by row merging */ 6180 b1 = rowBegin1[r]; 6181 e1 = rowEnd1[r]; 6182 b2 = rowBegin2[r]; 6183 e2 = rowEnd2[r]; 6184 while (b1 < e1 && b2 < e2) { 6185 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6186 j[t] = j1[b1]; 6187 imap1[t1] = t; 6188 imap2[t2] = t; 6189 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6190 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6191 t1++; 6192 t2++; 6193 t++; 6194 } else if (j1[b1] < j2[b2]) { 6195 j[t] = j1[b1]; 6196 imap1[t1] = t; 6197 b1 += jmap1[t1 + 1] - jmap1[t1]; 6198 t1++; 6199 t++; 6200 } else { 6201 j[t] = j2[b2]; 6202 imap2[t2] = t; 6203 b2 += jmap2[t2 + 1] - jmap2[t2]; 6204 t2++; 6205 t++; 6206 } 6207 } 6208 /* Merge the remaining in either j1[] or j2[] */ 6209 while (b1 < e1) { 6210 j[t] = j1[b1]; 6211 imap1[t1] = t; 6212 b1 += jmap1[t1 + 1] - jmap1[t1]; 6213 t1++; 6214 t++; 6215 } 6216 while (b2 < e2) { 6217 j[t] = j2[b2]; 6218 imap2[t2] = t; 6219 b2 += jmap2[t2 + 1] - jmap2[t2]; 6220 t2++; 6221 t++; 6222 } 6223 PetscCall(PetscIntCast(t, i + r + 1)); 6224 } 6225 PetscFunctionReturn(PETSC_SUCCESS); 6226 } 6227 6228 /* 6229 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6230 6231 Input Parameters: 6232 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6233 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6234 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6235 6236 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6237 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6238 6239 Output Parameters: 6240 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6241 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6242 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6243 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6244 6245 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6246 Atot: number of entries belonging to the diagonal block. 6247 Annz: number of unique nonzeros belonging to the diagonal block. 6248 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6249 repeats (i.e., same 'i,j' pair). 6250 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6251 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6252 6253 Atot: number of entries belonging to the diagonal block 6254 Annz: number of unique nonzeros belonging to the diagonal block. 6255 6256 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6257 6258 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6259 */ 6260 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6261 { 6262 PetscInt cstart, cend, rstart, rend, row, col; 6263 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6264 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6265 PetscCount k, m, p, q, r, s, mid; 6266 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6267 6268 PetscFunctionBegin; 6269 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6270 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6271 m = rend - rstart; 6272 6273 /* Skip negative rows */ 6274 for (k = 0; k < n; k++) 6275 if (i[k] >= 0) break; 6276 6277 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6278 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6279 */ 6280 while (k < n) { 6281 row = i[k]; 6282 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6283 for (s = k; s < n; s++) 6284 if (i[s] != row) break; 6285 6286 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6287 for (p = k; p < s; p++) { 6288 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6289 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6290 } 6291 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6292 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6293 rowBegin[row - rstart] = k; 6294 rowMid[row - rstart] = mid; 6295 rowEnd[row - rstart] = s; 6296 6297 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6298 Atot += mid - k; 6299 Btot += s - mid; 6300 6301 /* Count unique nonzeros of this diag row */ 6302 for (p = k; p < mid;) { 6303 col = j[p]; 6304 do { 6305 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6306 p++; 6307 } while (p < mid && j[p] == col); 6308 Annz++; 6309 } 6310 6311 /* Count unique nonzeros of this offdiag row */ 6312 for (p = mid; p < s;) { 6313 col = j[p]; 6314 do { 6315 p++; 6316 } while (p < s && j[p] == col); 6317 Bnnz++; 6318 } 6319 k = s; 6320 } 6321 6322 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6323 PetscCall(PetscMalloc1(Atot, &Aperm)); 6324 PetscCall(PetscMalloc1(Btot, &Bperm)); 6325 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6326 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6327 6328 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6329 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6330 for (r = 0; r < m; r++) { 6331 k = rowBegin[r]; 6332 mid = rowMid[r]; 6333 s = rowEnd[r]; 6334 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6335 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6336 Atot += mid - k; 6337 Btot += s - mid; 6338 6339 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6340 for (p = k; p < mid;) { 6341 col = j[p]; 6342 q = p; 6343 do { 6344 p++; 6345 } while (p < mid && j[p] == col); 6346 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6347 Annz++; 6348 } 6349 6350 for (p = mid; p < s;) { 6351 col = j[p]; 6352 q = p; 6353 do { 6354 p++; 6355 } while (p < s && j[p] == col); 6356 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6357 Bnnz++; 6358 } 6359 } 6360 /* Output */ 6361 *Aperm_ = Aperm; 6362 *Annz_ = Annz; 6363 *Atot_ = Atot; 6364 *Ajmap_ = Ajmap; 6365 *Bperm_ = Bperm; 6366 *Bnnz_ = Bnnz; 6367 *Btot_ = Btot; 6368 *Bjmap_ = Bjmap; 6369 PetscFunctionReturn(PETSC_SUCCESS); 6370 } 6371 6372 /* 6373 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6374 6375 Input Parameters: 6376 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6377 nnz: number of unique nonzeros in the merged matrix 6378 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6379 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6380 6381 Output Parameter: (memory is allocated by the caller) 6382 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6383 6384 Example: 6385 nnz1 = 4 6386 nnz = 6 6387 imap = [1,3,4,5] 6388 jmap = [0,3,5,6,7] 6389 then, 6390 jmap_new = [0,0,3,3,5,6,7] 6391 */ 6392 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6393 { 6394 PetscCount k, p; 6395 6396 PetscFunctionBegin; 6397 jmap_new[0] = 0; 6398 p = nnz; /* p loops over jmap_new[] backwards */ 6399 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6400 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6401 } 6402 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6403 PetscFunctionReturn(PETSC_SUCCESS); 6404 } 6405 6406 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6407 { 6408 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6409 6410 PetscFunctionBegin; 6411 PetscCall(PetscSFDestroy(&coo->sf)); 6412 PetscCall(PetscFree(coo->Aperm1)); 6413 PetscCall(PetscFree(coo->Bperm1)); 6414 PetscCall(PetscFree(coo->Ajmap1)); 6415 PetscCall(PetscFree(coo->Bjmap1)); 6416 PetscCall(PetscFree(coo->Aimap2)); 6417 PetscCall(PetscFree(coo->Bimap2)); 6418 PetscCall(PetscFree(coo->Aperm2)); 6419 PetscCall(PetscFree(coo->Bperm2)); 6420 PetscCall(PetscFree(coo->Ajmap2)); 6421 PetscCall(PetscFree(coo->Bjmap2)); 6422 PetscCall(PetscFree(coo->Cperm1)); 6423 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6424 PetscCall(PetscFree(coo)); 6425 PetscFunctionReturn(PETSC_SUCCESS); 6426 } 6427 6428 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6429 { 6430 MPI_Comm comm; 6431 PetscMPIInt rank, size; 6432 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6433 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6434 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6435 PetscContainer container; 6436 MatCOOStruct_MPIAIJ *coo; 6437 6438 PetscFunctionBegin; 6439 PetscCall(PetscFree(mpiaij->garray)); 6440 PetscCall(VecDestroy(&mpiaij->lvec)); 6441 #if defined(PETSC_USE_CTABLE) 6442 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6443 #else 6444 PetscCall(PetscFree(mpiaij->colmap)); 6445 #endif 6446 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6447 mat->assembled = PETSC_FALSE; 6448 mat->was_assembled = PETSC_FALSE; 6449 6450 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6451 PetscCallMPI(MPI_Comm_size(comm, &size)); 6452 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6453 PetscCall(PetscLayoutSetUp(mat->rmap)); 6454 PetscCall(PetscLayoutSetUp(mat->cmap)); 6455 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6456 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6457 PetscCall(MatGetLocalSize(mat, &m, &n)); 6458 PetscCall(MatGetSize(mat, &M, &N)); 6459 6460 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6461 /* entries come first, then local rows, then remote rows. */ 6462 PetscCount n1 = coo_n, *perm1; 6463 PetscInt *i1 = coo_i, *j1 = coo_j; 6464 6465 PetscCall(PetscMalloc1(n1, &perm1)); 6466 for (k = 0; k < n1; k++) perm1[k] = k; 6467 6468 /* Manipulate indices so that entries with negative row or col indices will have smallest 6469 row indices, local entries will have greater but negative row indices, and remote entries 6470 will have positive row indices. 6471 */ 6472 for (k = 0; k < n1; k++) { 6473 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6474 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6475 else { 6476 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6477 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6478 } 6479 } 6480 6481 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6482 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6483 6484 /* Advance k to the first entry we need to take care of */ 6485 for (k = 0; k < n1; k++) 6486 if (i1[k] > PETSC_INT_MIN) break; 6487 PetscCount i1start = k; 6488 6489 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6490 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6491 6492 /* Send remote rows to their owner */ 6493 /* Find which rows should be sent to which remote ranks*/ 6494 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6495 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6496 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6497 const PetscInt *ranges; 6498 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6499 6500 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6501 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6502 for (k = rem; k < n1;) { 6503 PetscMPIInt owner; 6504 PetscInt firstRow, lastRow; 6505 6506 /* Locate a row range */ 6507 firstRow = i1[k]; /* first row of this owner */ 6508 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6509 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6510 6511 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6512 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6513 6514 /* All entries in [k,p) belong to this remote owner */ 6515 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6516 PetscMPIInt *sendto2; 6517 PetscInt *nentries2; 6518 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6519 6520 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6521 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6522 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6523 PetscCall(PetscFree2(sendto, nentries2)); 6524 sendto = sendto2; 6525 nentries = nentries2; 6526 maxNsend = maxNsend2; 6527 } 6528 sendto[nsend] = owner; 6529 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6530 nsend++; 6531 k = p; 6532 } 6533 6534 /* Build 1st SF to know offsets on remote to send data */ 6535 PetscSF sf1; 6536 PetscInt nroots = 1, nroots2 = 0; 6537 PetscInt nleaves = nsend, nleaves2 = 0; 6538 PetscInt *offsets; 6539 PetscSFNode *iremote; 6540 6541 PetscCall(PetscSFCreate(comm, &sf1)); 6542 PetscCall(PetscMalloc1(nsend, &iremote)); 6543 PetscCall(PetscMalloc1(nsend, &offsets)); 6544 for (k = 0; k < nsend; k++) { 6545 iremote[k].rank = sendto[k]; 6546 iremote[k].index = 0; 6547 nleaves2 += nentries[k]; 6548 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6549 } 6550 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6551 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6552 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6553 PetscCall(PetscSFDestroy(&sf1)); 6554 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6555 6556 /* Build 2nd SF to send remote COOs to their owner */ 6557 PetscSF sf2; 6558 nroots = nroots2; 6559 nleaves = nleaves2; 6560 PetscCall(PetscSFCreate(comm, &sf2)); 6561 PetscCall(PetscSFSetFromOptions(sf2)); 6562 PetscCall(PetscMalloc1(nleaves, &iremote)); 6563 p = 0; 6564 for (k = 0; k < nsend; k++) { 6565 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6566 for (q = 0; q < nentries[k]; q++, p++) { 6567 iremote[p].rank = sendto[k]; 6568 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6569 } 6570 } 6571 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6572 6573 /* Send the remote COOs to their owner */ 6574 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6575 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6576 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6577 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6578 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6579 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6580 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6581 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6582 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6583 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6584 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6585 6586 PetscCall(PetscFree(offsets)); 6587 PetscCall(PetscFree2(sendto, nentries)); 6588 6589 /* Sort received COOs by row along with the permutation array */ 6590 for (k = 0; k < n2; k++) perm2[k] = k; 6591 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6592 6593 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6594 PetscCount *Cperm1; 6595 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6596 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6597 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6598 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6599 6600 /* Support for HYPRE matrices, kind of a hack. 6601 Swap min column with diagonal so that diagonal values will go first */ 6602 PetscBool hypre; 6603 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6604 if (hypre) { 6605 PetscInt *minj; 6606 PetscBT hasdiag; 6607 6608 PetscCall(PetscBTCreate(m, &hasdiag)); 6609 PetscCall(PetscMalloc1(m, &minj)); 6610 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6611 for (k = i1start; k < rem; k++) { 6612 if (j1[k] < cstart || j1[k] >= cend) continue; 6613 const PetscInt rindex = i1[k] - rstart; 6614 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6615 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6616 } 6617 for (k = 0; k < n2; k++) { 6618 if (j2[k] < cstart || j2[k] >= cend) continue; 6619 const PetscInt rindex = i2[k] - rstart; 6620 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6621 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6622 } 6623 for (k = i1start; k < rem; k++) { 6624 const PetscInt rindex = i1[k] - rstart; 6625 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6626 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6627 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6628 } 6629 for (k = 0; k < n2; k++) { 6630 const PetscInt rindex = i2[k] - rstart; 6631 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6632 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6633 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6634 } 6635 PetscCall(PetscBTDestroy(&hasdiag)); 6636 PetscCall(PetscFree(minj)); 6637 } 6638 6639 /* Split local COOs and received COOs into diag/offdiag portions */ 6640 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6641 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6642 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6643 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6644 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6645 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6646 6647 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6648 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6649 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6650 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6651 6652 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6653 PetscInt *Ai, *Bi; 6654 PetscInt *Aj, *Bj; 6655 6656 PetscCall(PetscMalloc1(m + 1, &Ai)); 6657 PetscCall(PetscMalloc1(m + 1, &Bi)); 6658 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6659 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6660 6661 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6662 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6663 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6664 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6665 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6666 6667 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6668 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6669 6670 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6671 /* expect nonzeros in A/B most likely have local contributing entries */ 6672 PetscInt Annz = Ai[m]; 6673 PetscInt Bnnz = Bi[m]; 6674 PetscCount *Ajmap1_new, *Bjmap1_new; 6675 6676 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6677 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6678 6679 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6680 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6681 6682 PetscCall(PetscFree(Aimap1)); 6683 PetscCall(PetscFree(Ajmap1)); 6684 PetscCall(PetscFree(Bimap1)); 6685 PetscCall(PetscFree(Bjmap1)); 6686 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6687 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6688 PetscCall(PetscFree(perm1)); 6689 PetscCall(PetscFree3(i2, j2, perm2)); 6690 6691 Ajmap1 = Ajmap1_new; 6692 Bjmap1 = Bjmap1_new; 6693 6694 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6695 if (Annz < Annz1 + Annz2) { 6696 PetscInt *Aj_new; 6697 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6698 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6699 PetscCall(PetscFree(Aj)); 6700 Aj = Aj_new; 6701 } 6702 6703 if (Bnnz < Bnnz1 + Bnnz2) { 6704 PetscInt *Bj_new; 6705 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6706 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6707 PetscCall(PetscFree(Bj)); 6708 Bj = Bj_new; 6709 } 6710 6711 /* Create new submatrices for on-process and off-process coupling */ 6712 PetscScalar *Aa, *Ba; 6713 MatType rtype; 6714 Mat_SeqAIJ *a, *b; 6715 PetscObjectState state; 6716 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6717 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6718 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6719 if (cstart) { 6720 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6721 } 6722 6723 PetscCall(MatGetRootType_Private(mat, &rtype)); 6724 6725 MatSeqXAIJGetOptions_Private(mpiaij->A); 6726 PetscCall(MatDestroy(&mpiaij->A)); 6727 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6728 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6729 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6730 6731 MatSeqXAIJGetOptions_Private(mpiaij->B); 6732 PetscCall(MatDestroy(&mpiaij->B)); 6733 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6734 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6735 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6736 6737 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6738 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6739 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6740 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6741 6742 a = (Mat_SeqAIJ *)mpiaij->A->data; 6743 b = (Mat_SeqAIJ *)mpiaij->B->data; 6744 a->free_a = PETSC_TRUE; 6745 a->free_ij = PETSC_TRUE; 6746 b->free_a = PETSC_TRUE; 6747 b->free_ij = PETSC_TRUE; 6748 a->maxnz = a->nz; 6749 b->maxnz = b->nz; 6750 6751 /* conversion must happen AFTER multiply setup */ 6752 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6753 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6754 PetscCall(VecDestroy(&mpiaij->lvec)); 6755 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6756 6757 // Put the COO struct in a container and then attach that to the matrix 6758 PetscCall(PetscMalloc1(1, &coo)); 6759 coo->n = coo_n; 6760 coo->sf = sf2; 6761 coo->sendlen = nleaves; 6762 coo->recvlen = nroots; 6763 coo->Annz = Annz; 6764 coo->Bnnz = Bnnz; 6765 coo->Annz2 = Annz2; 6766 coo->Bnnz2 = Bnnz2; 6767 coo->Atot1 = Atot1; 6768 coo->Atot2 = Atot2; 6769 coo->Btot1 = Btot1; 6770 coo->Btot2 = Btot2; 6771 coo->Ajmap1 = Ajmap1; 6772 coo->Aperm1 = Aperm1; 6773 coo->Bjmap1 = Bjmap1; 6774 coo->Bperm1 = Bperm1; 6775 coo->Aimap2 = Aimap2; 6776 coo->Ajmap2 = Ajmap2; 6777 coo->Aperm2 = Aperm2; 6778 coo->Bimap2 = Bimap2; 6779 coo->Bjmap2 = Bjmap2; 6780 coo->Bperm2 = Bperm2; 6781 coo->Cperm1 = Cperm1; 6782 // Allocate in preallocation. If not used, it has zero cost on host 6783 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6784 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6785 PetscCall(PetscContainerSetPointer(container, coo)); 6786 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6787 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6788 PetscCall(PetscContainerDestroy(&container)); 6789 PetscFunctionReturn(PETSC_SUCCESS); 6790 } 6791 6792 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6793 { 6794 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6795 Mat A = mpiaij->A, B = mpiaij->B; 6796 PetscScalar *Aa, *Ba; 6797 PetscScalar *sendbuf, *recvbuf; 6798 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6799 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6800 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6801 const PetscCount *Cperm1; 6802 PetscContainer container; 6803 MatCOOStruct_MPIAIJ *coo; 6804 6805 PetscFunctionBegin; 6806 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6807 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6808 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6809 sendbuf = coo->sendbuf; 6810 recvbuf = coo->recvbuf; 6811 Ajmap1 = coo->Ajmap1; 6812 Ajmap2 = coo->Ajmap2; 6813 Aimap2 = coo->Aimap2; 6814 Bjmap1 = coo->Bjmap1; 6815 Bjmap2 = coo->Bjmap2; 6816 Bimap2 = coo->Bimap2; 6817 Aperm1 = coo->Aperm1; 6818 Aperm2 = coo->Aperm2; 6819 Bperm1 = coo->Bperm1; 6820 Bperm2 = coo->Bperm2; 6821 Cperm1 = coo->Cperm1; 6822 6823 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6824 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6825 6826 /* Pack entries to be sent to remote */ 6827 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6828 6829 /* Send remote entries to their owner and overlap the communication with local computation */ 6830 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6831 /* Add local entries to A and B */ 6832 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6833 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6834 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6835 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6836 } 6837 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6838 PetscScalar sum = 0.0; 6839 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6840 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6841 } 6842 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6843 6844 /* Add received remote entries to A and B */ 6845 for (PetscCount i = 0; i < coo->Annz2; i++) { 6846 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6847 } 6848 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6849 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6850 } 6851 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6852 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6853 PetscFunctionReturn(PETSC_SUCCESS); 6854 } 6855 6856 /*MC 6857 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6858 6859 Options Database Keys: 6860 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6861 6862 Level: beginner 6863 6864 Notes: 6865 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6866 in this case the values associated with the rows and columns one passes in are set to zero 6867 in the matrix 6868 6869 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6870 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6871 6872 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6873 M*/ 6874 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6875 { 6876 Mat_MPIAIJ *b; 6877 PetscMPIInt size; 6878 6879 PetscFunctionBegin; 6880 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6881 6882 PetscCall(PetscNew(&b)); 6883 B->data = (void *)b; 6884 B->ops[0] = MatOps_Values; 6885 B->assembled = PETSC_FALSE; 6886 B->insertmode = NOT_SET_VALUES; 6887 b->size = size; 6888 6889 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6890 6891 /* build cache for off array entries formed */ 6892 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6893 6894 b->donotstash = PETSC_FALSE; 6895 b->colmap = NULL; 6896 b->garray = NULL; 6897 b->roworiented = PETSC_TRUE; 6898 6899 /* stuff used for matrix vector multiply */ 6900 b->lvec = NULL; 6901 b->Mvctx = NULL; 6902 6903 /* stuff for MatGetRow() */ 6904 b->rowindices = NULL; 6905 b->rowvalues = NULL; 6906 b->getrowactive = PETSC_FALSE; 6907 6908 /* flexible pointer used in CUSPARSE classes */ 6909 b->spptr = NULL; 6910 6911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6913 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6922 #if defined(PETSC_HAVE_CUDA) 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6924 #endif 6925 #if defined(PETSC_HAVE_HIP) 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6927 #endif 6928 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6930 #endif 6931 #if defined(PETSC_HAVE_MKL_SPARSE) 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6933 #endif 6934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6938 #if defined(PETSC_HAVE_ELEMENTAL) 6939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6940 #endif 6941 #if defined(PETSC_HAVE_SCALAPACK) 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6943 #endif 6944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6946 #if defined(PETSC_HAVE_HYPRE) 6947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6949 #endif 6950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6951 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6952 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6954 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6955 PetscFunctionReturn(PETSC_SUCCESS); 6956 } 6957 6958 /*@ 6959 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6960 and "off-diagonal" part of the matrix in CSR format. 6961 6962 Collective 6963 6964 Input Parameters: 6965 + comm - MPI communicator 6966 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6967 . n - This value should be the same as the local size used in creating the 6968 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6969 calculated if `N` is given) For square matrices `n` is almost always `m`. 6970 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6971 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6972 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6973 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6974 . a - matrix values 6975 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6976 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6977 - oa - matrix values 6978 6979 Output Parameter: 6980 . mat - the matrix 6981 6982 Level: advanced 6983 6984 Notes: 6985 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6986 must free the arrays once the matrix has been destroyed and not before. 6987 6988 The `i` and `j` indices are 0 based 6989 6990 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6991 6992 This sets local rows and cannot be used to set off-processor values. 6993 6994 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6995 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6996 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6997 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6998 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6999 communication if it is known that only local entries will be set. 7000 7001 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7002 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7003 @*/ 7004 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7005 { 7006 Mat_MPIAIJ *maij; 7007 7008 PetscFunctionBegin; 7009 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7010 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7011 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7012 PetscCall(MatCreate(comm, mat)); 7013 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7014 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7015 maij = (Mat_MPIAIJ *)(*mat)->data; 7016 7017 (*mat)->preallocated = PETSC_TRUE; 7018 7019 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7020 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7021 7022 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7023 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7024 7025 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7026 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7027 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7028 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7029 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7030 PetscFunctionReturn(PETSC_SUCCESS); 7031 } 7032 7033 typedef struct { 7034 Mat *mp; /* intermediate products */ 7035 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7036 PetscInt cp; /* number of intermediate products */ 7037 7038 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7039 PetscInt *startsj_s, *startsj_r; 7040 PetscScalar *bufa; 7041 Mat P_oth; 7042 7043 /* may take advantage of merging product->B */ 7044 Mat Bloc; /* B-local by merging diag and off-diag */ 7045 7046 /* cusparse does not have support to split between symbolic and numeric phases. 7047 When api_user is true, we don't need to update the numerical values 7048 of the temporary storage */ 7049 PetscBool reusesym; 7050 7051 /* support for COO values insertion */ 7052 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7053 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7054 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7055 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7056 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7057 PetscMemType mtype; 7058 7059 /* customization */ 7060 PetscBool abmerge; 7061 PetscBool P_oth_bind; 7062 } MatMatMPIAIJBACKEND; 7063 7064 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7065 { 7066 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7067 PetscInt i; 7068 7069 PetscFunctionBegin; 7070 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7071 PetscCall(PetscFree(mmdata->bufa)); 7072 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7073 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7074 PetscCall(MatDestroy(&mmdata->P_oth)); 7075 PetscCall(MatDestroy(&mmdata->Bloc)); 7076 PetscCall(PetscSFDestroy(&mmdata->sf)); 7077 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7078 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7079 PetscCall(PetscFree(mmdata->own[0])); 7080 PetscCall(PetscFree(mmdata->own)); 7081 PetscCall(PetscFree(mmdata->off[0])); 7082 PetscCall(PetscFree(mmdata->off)); 7083 PetscCall(PetscFree(mmdata)); 7084 PetscFunctionReturn(PETSC_SUCCESS); 7085 } 7086 7087 /* Copy selected n entries with indices in idx[] of A to v[]. 7088 If idx is NULL, copy the whole data array of A to v[] 7089 */ 7090 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7091 { 7092 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7093 7094 PetscFunctionBegin; 7095 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7096 if (f) { 7097 PetscCall((*f)(A, n, idx, v)); 7098 } else { 7099 const PetscScalar *vv; 7100 7101 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7102 if (n && idx) { 7103 PetscScalar *w = v; 7104 const PetscInt *oi = idx; 7105 PetscInt j; 7106 7107 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7108 } else { 7109 PetscCall(PetscArraycpy(v, vv, n)); 7110 } 7111 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7112 } 7113 PetscFunctionReturn(PETSC_SUCCESS); 7114 } 7115 7116 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7117 { 7118 MatMatMPIAIJBACKEND *mmdata; 7119 PetscInt i, n_d, n_o; 7120 7121 PetscFunctionBegin; 7122 MatCheckProduct(C, 1); 7123 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7124 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7125 if (!mmdata->reusesym) { /* update temporary matrices */ 7126 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7127 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7128 } 7129 mmdata->reusesym = PETSC_FALSE; 7130 7131 for (i = 0; i < mmdata->cp; i++) { 7132 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7133 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7134 } 7135 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7136 PetscInt noff; 7137 7138 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7139 if (mmdata->mptmp[i]) continue; 7140 if (noff) { 7141 PetscInt nown; 7142 7143 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7144 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7145 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7146 n_o += noff; 7147 n_d += nown; 7148 } else { 7149 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7150 7151 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7152 n_d += mm->nz; 7153 } 7154 } 7155 if (mmdata->hasoffproc) { /* offprocess insertion */ 7156 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7157 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7158 } 7159 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7160 PetscFunctionReturn(PETSC_SUCCESS); 7161 } 7162 7163 /* Support for Pt * A, A * P, or Pt * A * P */ 7164 #define MAX_NUMBER_INTERMEDIATE 4 7165 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7166 { 7167 Mat_Product *product = C->product; 7168 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7169 Mat_MPIAIJ *a, *p; 7170 MatMatMPIAIJBACKEND *mmdata; 7171 ISLocalToGlobalMapping P_oth_l2g = NULL; 7172 IS glob = NULL; 7173 const char *prefix; 7174 char pprefix[256]; 7175 const PetscInt *globidx, *P_oth_idx; 7176 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7177 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7178 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7179 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7180 /* a base offset; type-2: sparse with a local to global map table */ 7181 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7182 7183 MatProductType ptype; 7184 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7185 PetscMPIInt size; 7186 7187 PetscFunctionBegin; 7188 MatCheckProduct(C, 1); 7189 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7190 ptype = product->type; 7191 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7192 ptype = MATPRODUCT_AB; 7193 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7194 } 7195 switch (ptype) { 7196 case MATPRODUCT_AB: 7197 A = product->A; 7198 P = product->B; 7199 m = A->rmap->n; 7200 n = P->cmap->n; 7201 M = A->rmap->N; 7202 N = P->cmap->N; 7203 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7204 break; 7205 case MATPRODUCT_AtB: 7206 P = product->A; 7207 A = product->B; 7208 m = P->cmap->n; 7209 n = A->cmap->n; 7210 M = P->cmap->N; 7211 N = A->cmap->N; 7212 hasoffproc = PETSC_TRUE; 7213 break; 7214 case MATPRODUCT_PtAP: 7215 A = product->A; 7216 P = product->B; 7217 m = P->cmap->n; 7218 n = P->cmap->n; 7219 M = P->cmap->N; 7220 N = P->cmap->N; 7221 hasoffproc = PETSC_TRUE; 7222 break; 7223 default: 7224 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7225 } 7226 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7227 if (size == 1) hasoffproc = PETSC_FALSE; 7228 7229 /* defaults */ 7230 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7231 mp[i] = NULL; 7232 mptmp[i] = PETSC_FALSE; 7233 rmapt[i] = -1; 7234 cmapt[i] = -1; 7235 rmapa[i] = NULL; 7236 cmapa[i] = NULL; 7237 } 7238 7239 /* customization */ 7240 PetscCall(PetscNew(&mmdata)); 7241 mmdata->reusesym = product->api_user; 7242 if (ptype == MATPRODUCT_AB) { 7243 if (product->api_user) { 7244 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7245 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7246 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7247 PetscOptionsEnd(); 7248 } else { 7249 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7250 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7251 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7252 PetscOptionsEnd(); 7253 } 7254 } else if (ptype == MATPRODUCT_PtAP) { 7255 if (product->api_user) { 7256 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7257 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7258 PetscOptionsEnd(); 7259 } else { 7260 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7261 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7262 PetscOptionsEnd(); 7263 } 7264 } 7265 a = (Mat_MPIAIJ *)A->data; 7266 p = (Mat_MPIAIJ *)P->data; 7267 PetscCall(MatSetSizes(C, m, n, M, N)); 7268 PetscCall(PetscLayoutSetUp(C->rmap)); 7269 PetscCall(PetscLayoutSetUp(C->cmap)); 7270 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7271 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7272 7273 cp = 0; 7274 switch (ptype) { 7275 case MATPRODUCT_AB: /* A * P */ 7276 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7277 7278 /* A_diag * P_local (merged or not) */ 7279 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7280 /* P is product->B */ 7281 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7282 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7283 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7284 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7285 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7286 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7287 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7288 mp[cp]->product->api_user = product->api_user; 7289 PetscCall(MatProductSetFromOptions(mp[cp])); 7290 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7291 PetscCall(ISGetIndices(glob, &globidx)); 7292 rmapt[cp] = 1; 7293 cmapt[cp] = 2; 7294 cmapa[cp] = globidx; 7295 mptmp[cp] = PETSC_FALSE; 7296 cp++; 7297 } else { /* A_diag * P_diag and A_diag * P_off */ 7298 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7299 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7300 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7301 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7302 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7303 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7304 mp[cp]->product->api_user = product->api_user; 7305 PetscCall(MatProductSetFromOptions(mp[cp])); 7306 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7307 rmapt[cp] = 1; 7308 cmapt[cp] = 1; 7309 mptmp[cp] = PETSC_FALSE; 7310 cp++; 7311 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7312 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7313 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7314 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7315 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7316 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7317 mp[cp]->product->api_user = product->api_user; 7318 PetscCall(MatProductSetFromOptions(mp[cp])); 7319 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7320 rmapt[cp] = 1; 7321 cmapt[cp] = 2; 7322 cmapa[cp] = p->garray; 7323 mptmp[cp] = PETSC_FALSE; 7324 cp++; 7325 } 7326 7327 /* A_off * P_other */ 7328 if (mmdata->P_oth) { 7329 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7330 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7331 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7332 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7333 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7334 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7335 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7336 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7337 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7338 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7339 mp[cp]->product->api_user = product->api_user; 7340 PetscCall(MatProductSetFromOptions(mp[cp])); 7341 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7342 rmapt[cp] = 1; 7343 cmapt[cp] = 2; 7344 cmapa[cp] = P_oth_idx; 7345 mptmp[cp] = PETSC_FALSE; 7346 cp++; 7347 } 7348 break; 7349 7350 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7351 /* A is product->B */ 7352 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7353 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7354 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7355 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7356 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7357 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7358 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7359 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7360 mp[cp]->product->api_user = product->api_user; 7361 PetscCall(MatProductSetFromOptions(mp[cp])); 7362 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7363 PetscCall(ISGetIndices(glob, &globidx)); 7364 rmapt[cp] = 2; 7365 rmapa[cp] = globidx; 7366 cmapt[cp] = 2; 7367 cmapa[cp] = globidx; 7368 mptmp[cp] = PETSC_FALSE; 7369 cp++; 7370 } else { 7371 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7372 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7373 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7374 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7375 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7376 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7377 mp[cp]->product->api_user = product->api_user; 7378 PetscCall(MatProductSetFromOptions(mp[cp])); 7379 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7380 PetscCall(ISGetIndices(glob, &globidx)); 7381 rmapt[cp] = 1; 7382 cmapt[cp] = 2; 7383 cmapa[cp] = globidx; 7384 mptmp[cp] = PETSC_FALSE; 7385 cp++; 7386 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7387 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7388 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7389 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7390 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7391 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7392 mp[cp]->product->api_user = product->api_user; 7393 PetscCall(MatProductSetFromOptions(mp[cp])); 7394 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7395 rmapt[cp] = 2; 7396 rmapa[cp] = p->garray; 7397 cmapt[cp] = 2; 7398 cmapa[cp] = globidx; 7399 mptmp[cp] = PETSC_FALSE; 7400 cp++; 7401 } 7402 break; 7403 case MATPRODUCT_PtAP: 7404 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7405 /* P is product->B */ 7406 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7407 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7408 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7409 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7410 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7411 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7412 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7413 mp[cp]->product->api_user = product->api_user; 7414 PetscCall(MatProductSetFromOptions(mp[cp])); 7415 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7416 PetscCall(ISGetIndices(glob, &globidx)); 7417 rmapt[cp] = 2; 7418 rmapa[cp] = globidx; 7419 cmapt[cp] = 2; 7420 cmapa[cp] = globidx; 7421 mptmp[cp] = PETSC_FALSE; 7422 cp++; 7423 if (mmdata->P_oth) { 7424 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7425 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7426 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7427 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7428 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7429 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7430 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7431 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7432 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7433 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7434 mp[cp]->product->api_user = product->api_user; 7435 PetscCall(MatProductSetFromOptions(mp[cp])); 7436 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7437 mptmp[cp] = PETSC_TRUE; 7438 cp++; 7439 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7440 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7441 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7442 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7443 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7444 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7445 mp[cp]->product->api_user = product->api_user; 7446 PetscCall(MatProductSetFromOptions(mp[cp])); 7447 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7448 rmapt[cp] = 2; 7449 rmapa[cp] = globidx; 7450 cmapt[cp] = 2; 7451 cmapa[cp] = P_oth_idx; 7452 mptmp[cp] = PETSC_FALSE; 7453 cp++; 7454 } 7455 break; 7456 default: 7457 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7458 } 7459 /* sanity check */ 7460 if (size > 1) 7461 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7462 7463 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7464 for (i = 0; i < cp; i++) { 7465 mmdata->mp[i] = mp[i]; 7466 mmdata->mptmp[i] = mptmp[i]; 7467 } 7468 mmdata->cp = cp; 7469 C->product->data = mmdata; 7470 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7471 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7472 7473 /* memory type */ 7474 mmdata->mtype = PETSC_MEMTYPE_HOST; 7475 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7476 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7477 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7478 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7479 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7480 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7481 7482 /* prepare coo coordinates for values insertion */ 7483 7484 /* count total nonzeros of those intermediate seqaij Mats 7485 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7486 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7487 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7488 */ 7489 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7490 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7491 if (mptmp[cp]) continue; 7492 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7493 const PetscInt *rmap = rmapa[cp]; 7494 const PetscInt mr = mp[cp]->rmap->n; 7495 const PetscInt rs = C->rmap->rstart; 7496 const PetscInt re = C->rmap->rend; 7497 const PetscInt *ii = mm->i; 7498 for (i = 0; i < mr; i++) { 7499 const PetscInt gr = rmap[i]; 7500 const PetscInt nz = ii[i + 1] - ii[i]; 7501 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7502 else ncoo_oown += nz; /* this row is local */ 7503 } 7504 } else ncoo_d += mm->nz; 7505 } 7506 7507 /* 7508 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7509 7510 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7511 7512 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7513 7514 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7515 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7516 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7517 7518 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7519 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7520 */ 7521 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7522 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7523 7524 /* gather (i,j) of nonzeros inserted by remote procs */ 7525 if (hasoffproc) { 7526 PetscSF msf; 7527 PetscInt ncoo2, *coo_i2, *coo_j2; 7528 7529 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7530 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7531 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7532 7533 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7534 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7535 PetscInt *idxoff = mmdata->off[cp]; 7536 PetscInt *idxown = mmdata->own[cp]; 7537 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7538 const PetscInt *rmap = rmapa[cp]; 7539 const PetscInt *cmap = cmapa[cp]; 7540 const PetscInt *ii = mm->i; 7541 PetscInt *coi = coo_i + ncoo_o; 7542 PetscInt *coj = coo_j + ncoo_o; 7543 const PetscInt mr = mp[cp]->rmap->n; 7544 const PetscInt rs = C->rmap->rstart; 7545 const PetscInt re = C->rmap->rend; 7546 const PetscInt cs = C->cmap->rstart; 7547 for (i = 0; i < mr; i++) { 7548 const PetscInt *jj = mm->j + ii[i]; 7549 const PetscInt gr = rmap[i]; 7550 const PetscInt nz = ii[i + 1] - ii[i]; 7551 if (gr < rs || gr >= re) { /* this is an offproc row */ 7552 for (j = ii[i]; j < ii[i + 1]; j++) { 7553 *coi++ = gr; 7554 *idxoff++ = j; 7555 } 7556 if (!cmapt[cp]) { /* already global */ 7557 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7558 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7559 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7560 } else { /* offdiag */ 7561 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7562 } 7563 ncoo_o += nz; 7564 } else { /* this is a local row */ 7565 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7566 } 7567 } 7568 } 7569 mmdata->off[cp + 1] = idxoff; 7570 mmdata->own[cp + 1] = idxown; 7571 } 7572 7573 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7574 PetscInt incoo_o; 7575 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7576 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7577 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7578 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7579 ncoo = ncoo_d + ncoo_oown + ncoo2; 7580 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7581 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7582 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7583 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7584 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7585 PetscCall(PetscFree2(coo_i, coo_j)); 7586 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7587 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7588 coo_i = coo_i2; 7589 coo_j = coo_j2; 7590 } else { /* no offproc values insertion */ 7591 ncoo = ncoo_d; 7592 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7593 7594 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7595 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7596 PetscCall(PetscSFSetUp(mmdata->sf)); 7597 } 7598 mmdata->hasoffproc = hasoffproc; 7599 7600 /* gather (i,j) of nonzeros inserted locally */ 7601 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7602 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7603 PetscInt *coi = coo_i + ncoo_d; 7604 PetscInt *coj = coo_j + ncoo_d; 7605 const PetscInt *jj = mm->j; 7606 const PetscInt *ii = mm->i; 7607 const PetscInt *cmap = cmapa[cp]; 7608 const PetscInt *rmap = rmapa[cp]; 7609 const PetscInt mr = mp[cp]->rmap->n; 7610 const PetscInt rs = C->rmap->rstart; 7611 const PetscInt re = C->rmap->rend; 7612 const PetscInt cs = C->cmap->rstart; 7613 7614 if (mptmp[cp]) continue; 7615 if (rmapt[cp] == 1) { /* consecutive rows */ 7616 /* fill coo_i */ 7617 for (i = 0; i < mr; i++) { 7618 const PetscInt gr = i + rs; 7619 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7620 } 7621 /* fill coo_j */ 7622 if (!cmapt[cp]) { /* type-0, already global */ 7623 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7624 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7625 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7626 } else { /* type-2, local to global for sparse columns */ 7627 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7628 } 7629 ncoo_d += mm->nz; 7630 } else if (rmapt[cp] == 2) { /* sparse rows */ 7631 for (i = 0; i < mr; i++) { 7632 const PetscInt *jj = mm->j + ii[i]; 7633 const PetscInt gr = rmap[i]; 7634 const PetscInt nz = ii[i + 1] - ii[i]; 7635 if (gr >= rs && gr < re) { /* local rows */ 7636 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7637 if (!cmapt[cp]) { /* type-0, already global */ 7638 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7639 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7640 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7641 } else { /* type-2, local to global for sparse columns */ 7642 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7643 } 7644 ncoo_d += nz; 7645 } 7646 } 7647 } 7648 } 7649 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7650 PetscCall(ISDestroy(&glob)); 7651 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7652 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7653 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7654 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7655 7656 /* preallocate with COO data */ 7657 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7658 PetscCall(PetscFree2(coo_i, coo_j)); 7659 PetscFunctionReturn(PETSC_SUCCESS); 7660 } 7661 7662 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7663 { 7664 Mat_Product *product = mat->product; 7665 #if defined(PETSC_HAVE_DEVICE) 7666 PetscBool match = PETSC_FALSE; 7667 PetscBool usecpu = PETSC_FALSE; 7668 #else 7669 PetscBool match = PETSC_TRUE; 7670 #endif 7671 7672 PetscFunctionBegin; 7673 MatCheckProduct(mat, 1); 7674 #if defined(PETSC_HAVE_DEVICE) 7675 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7676 if (match) { /* we can always fallback to the CPU if requested */ 7677 switch (product->type) { 7678 case MATPRODUCT_AB: 7679 if (product->api_user) { 7680 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7681 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7682 PetscOptionsEnd(); 7683 } else { 7684 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7685 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7686 PetscOptionsEnd(); 7687 } 7688 break; 7689 case MATPRODUCT_AtB: 7690 if (product->api_user) { 7691 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7692 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7693 PetscOptionsEnd(); 7694 } else { 7695 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7696 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7697 PetscOptionsEnd(); 7698 } 7699 break; 7700 case MATPRODUCT_PtAP: 7701 if (product->api_user) { 7702 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7703 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7704 PetscOptionsEnd(); 7705 } else { 7706 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7707 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7708 PetscOptionsEnd(); 7709 } 7710 break; 7711 default: 7712 break; 7713 } 7714 match = (PetscBool)!usecpu; 7715 } 7716 #endif 7717 if (match) { 7718 switch (product->type) { 7719 case MATPRODUCT_AB: 7720 case MATPRODUCT_AtB: 7721 case MATPRODUCT_PtAP: 7722 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7723 break; 7724 default: 7725 break; 7726 } 7727 } 7728 /* fallback to MPIAIJ ops */ 7729 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7730 PetscFunctionReturn(PETSC_SUCCESS); 7731 } 7732 7733 /* 7734 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7735 7736 n - the number of block indices in cc[] 7737 cc - the block indices (must be large enough to contain the indices) 7738 */ 7739 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7740 { 7741 PetscInt cnt = -1, nidx, j; 7742 const PetscInt *idx; 7743 7744 PetscFunctionBegin; 7745 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7746 if (nidx) { 7747 cnt = 0; 7748 cc[cnt] = idx[0] / bs; 7749 for (j = 1; j < nidx; j++) { 7750 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7751 } 7752 } 7753 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7754 *n = cnt + 1; 7755 PetscFunctionReturn(PETSC_SUCCESS); 7756 } 7757 7758 /* 7759 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7760 7761 ncollapsed - the number of block indices 7762 collapsed - the block indices (must be large enough to contain the indices) 7763 */ 7764 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7765 { 7766 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7767 7768 PetscFunctionBegin; 7769 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7770 for (i = start + 1; i < start + bs; i++) { 7771 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7772 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7773 cprevtmp = cprev; 7774 cprev = merged; 7775 merged = cprevtmp; 7776 } 7777 *ncollapsed = nprev; 7778 if (collapsed) *collapsed = cprev; 7779 PetscFunctionReturn(PETSC_SUCCESS); 7780 } 7781 7782 /* 7783 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7784 7785 Input Parameter: 7786 . Amat - matrix 7787 - symmetrize - make the result symmetric 7788 + scale - scale with diagonal 7789 7790 Output Parameter: 7791 . a_Gmat - output scalar graph >= 0 7792 7793 */ 7794 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7795 { 7796 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7797 MPI_Comm comm; 7798 Mat Gmat; 7799 PetscBool ismpiaij, isseqaij; 7800 Mat a, b, c; 7801 MatType jtype; 7802 7803 PetscFunctionBegin; 7804 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7805 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7806 PetscCall(MatGetSize(Amat, &MM, &NN)); 7807 PetscCall(MatGetBlockSize(Amat, &bs)); 7808 nloc = (Iend - Istart) / bs; 7809 7810 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7811 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7812 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7813 7814 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7815 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7816 implementation */ 7817 if (bs > 1) { 7818 PetscCall(MatGetType(Amat, &jtype)); 7819 PetscCall(MatCreate(comm, &Gmat)); 7820 PetscCall(MatSetType(Gmat, jtype)); 7821 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7822 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7823 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7824 PetscInt *d_nnz, *o_nnz; 7825 MatScalar *aa, val, *AA; 7826 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7827 7828 if (isseqaij) { 7829 a = Amat; 7830 b = NULL; 7831 } else { 7832 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7833 a = d->A; 7834 b = d->B; 7835 } 7836 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7837 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7838 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7839 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7840 const PetscInt *cols1, *cols2; 7841 7842 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7843 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7844 nnz[brow / bs] = nc2 / bs; 7845 if (nc2 % bs) ok = 0; 7846 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7847 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7848 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7849 if (nc1 != nc2) ok = 0; 7850 else { 7851 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7852 if (cols1[jj] != cols2[jj]) ok = 0; 7853 if (cols1[jj] % bs != jj % bs) ok = 0; 7854 } 7855 } 7856 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7857 } 7858 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7859 if (!ok) { 7860 PetscCall(PetscFree2(d_nnz, o_nnz)); 7861 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7862 goto old_bs; 7863 } 7864 } 7865 } 7866 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7867 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7868 PetscCall(PetscFree2(d_nnz, o_nnz)); 7869 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7870 // diag 7871 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7872 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7873 7874 ai = aseq->i; 7875 n = ai[brow + 1] - ai[brow]; 7876 aj = aseq->j + ai[brow]; 7877 for (PetscInt k = 0; k < n; k += bs) { // block columns 7878 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7879 val = 0; 7880 if (index_size == 0) { 7881 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7882 aa = aseq->a + ai[brow + ii] + k; 7883 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7884 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7885 } 7886 } 7887 } else { // use (index,index) value if provided 7888 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7889 PetscInt ii = index[iii]; 7890 aa = aseq->a + ai[brow + ii] + k; 7891 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7892 PetscInt jj = index[jjj]; 7893 val += PetscAbs(PetscRealPart(aa[jj])); 7894 } 7895 } 7896 } 7897 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7898 AA[k / bs] = val; 7899 } 7900 grow = Istart / bs + brow / bs; 7901 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7902 } 7903 // off-diag 7904 if (ismpiaij) { 7905 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7906 const PetscScalar *vals; 7907 const PetscInt *cols, *garray = aij->garray; 7908 7909 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7910 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7911 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7912 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7913 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7914 AA[k / bs] = 0; 7915 AJ[cidx] = garray[cols[k]] / bs; 7916 } 7917 nc = ncols / bs; 7918 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7919 if (index_size == 0) { 7920 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7921 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7922 for (PetscInt k = 0; k < ncols; k += bs) { 7923 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7924 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7925 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7926 } 7927 } 7928 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7929 } 7930 } else { // use (index,index) value if provided 7931 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7932 PetscInt ii = index[iii]; 7933 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7934 for (PetscInt k = 0; k < ncols; k += bs) { 7935 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7936 PetscInt jj = index[jjj]; 7937 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7938 } 7939 } 7940 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7941 } 7942 } 7943 grow = Istart / bs + brow / bs; 7944 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7945 } 7946 } 7947 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7948 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7949 PetscCall(PetscFree2(AA, AJ)); 7950 } else { 7951 const PetscScalar *vals; 7952 const PetscInt *idx; 7953 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7954 old_bs: 7955 /* 7956 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7957 */ 7958 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7959 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7960 if (isseqaij) { 7961 PetscInt max_d_nnz; 7962 7963 /* 7964 Determine exact preallocation count for (sequential) scalar matrix 7965 */ 7966 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7967 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7968 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7969 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7970 PetscCall(PetscFree3(w0, w1, w2)); 7971 } else if (ismpiaij) { 7972 Mat Daij, Oaij; 7973 const PetscInt *garray; 7974 PetscInt max_d_nnz; 7975 7976 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7977 /* 7978 Determine exact preallocation count for diagonal block portion of scalar matrix 7979 */ 7980 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7981 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7982 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7983 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7984 PetscCall(PetscFree3(w0, w1, w2)); 7985 /* 7986 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7987 */ 7988 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7989 o_nnz[jj] = 0; 7990 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7991 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7992 o_nnz[jj] += ncols; 7993 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7994 } 7995 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7996 } 7997 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7998 /* get scalar copy (norms) of matrix */ 7999 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8000 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8001 PetscCall(PetscFree2(d_nnz, o_nnz)); 8002 for (Ii = Istart; Ii < Iend; Ii++) { 8003 PetscInt dest_row = Ii / bs; 8004 8005 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8006 for (jj = 0; jj < ncols; jj++) { 8007 PetscInt dest_col = idx[jj] / bs; 8008 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8009 8010 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8011 } 8012 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8013 } 8014 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8015 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8016 } 8017 } else { 8018 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8019 else { 8020 Gmat = Amat; 8021 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8022 } 8023 if (isseqaij) { 8024 a = Gmat; 8025 b = NULL; 8026 } else { 8027 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8028 a = d->A; 8029 b = d->B; 8030 } 8031 if (filter >= 0 || scale) { 8032 /* take absolute value of each entry */ 8033 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8034 MatInfo info; 8035 PetscScalar *avals; 8036 8037 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8038 PetscCall(MatSeqAIJGetArray(c, &avals)); 8039 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8040 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8041 } 8042 } 8043 } 8044 if (symmetrize) { 8045 PetscBool isset, issym; 8046 8047 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8048 if (!isset || !issym) { 8049 Mat matTrans; 8050 8051 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8052 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8053 PetscCall(MatDestroy(&matTrans)); 8054 } 8055 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8056 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8057 if (scale) { 8058 /* scale c for all diagonal values = 1 or -1 */ 8059 Vec diag; 8060 8061 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8062 PetscCall(MatGetDiagonal(Gmat, diag)); 8063 PetscCall(VecReciprocal(diag)); 8064 PetscCall(VecSqrtAbs(diag)); 8065 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8066 PetscCall(VecDestroy(&diag)); 8067 } 8068 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8069 if (filter >= 0) { 8070 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8071 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8072 } 8073 *a_Gmat = Gmat; 8074 PetscFunctionReturn(PETSC_SUCCESS); 8075 } 8076 8077 /* 8078 Special version for direct calls from Fortran 8079 */ 8080 8081 /* Change these macros so can be used in void function */ 8082 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8083 #undef PetscCall 8084 #define PetscCall(...) \ 8085 do { \ 8086 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8087 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8088 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8089 return; \ 8090 } \ 8091 } while (0) 8092 8093 #undef SETERRQ 8094 #define SETERRQ(comm, ierr, ...) \ 8095 do { \ 8096 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8097 return; \ 8098 } while (0) 8099 8100 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8101 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8102 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8103 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8104 #else 8105 #endif 8106 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8107 { 8108 Mat mat = *mmat; 8109 PetscInt m = *mm, n = *mn; 8110 InsertMode addv = *maddv; 8111 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8112 PetscScalar value; 8113 8114 MatCheckPreallocated(mat, 1); 8115 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8116 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8117 { 8118 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8119 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8120 PetscBool roworiented = aij->roworiented; 8121 8122 /* Some Variables required in the macro */ 8123 Mat A = aij->A; 8124 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8125 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8126 MatScalar *aa; 8127 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8128 Mat B = aij->B; 8129 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8130 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8131 MatScalar *ba; 8132 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8133 * cannot use "#if defined" inside a macro. */ 8134 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8135 8136 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8137 PetscInt nonew = a->nonew; 8138 MatScalar *ap1, *ap2; 8139 8140 PetscFunctionBegin; 8141 PetscCall(MatSeqAIJGetArray(A, &aa)); 8142 PetscCall(MatSeqAIJGetArray(B, &ba)); 8143 for (i = 0; i < m; i++) { 8144 if (im[i] < 0) continue; 8145 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8146 if (im[i] >= rstart && im[i] < rend) { 8147 row = im[i] - rstart; 8148 lastcol1 = -1; 8149 rp1 = aj + ai[row]; 8150 ap1 = aa + ai[row]; 8151 rmax1 = aimax[row]; 8152 nrow1 = ailen[row]; 8153 low1 = 0; 8154 high1 = nrow1; 8155 lastcol2 = -1; 8156 rp2 = bj + bi[row]; 8157 ap2 = ba + bi[row]; 8158 rmax2 = bimax[row]; 8159 nrow2 = bilen[row]; 8160 low2 = 0; 8161 high2 = nrow2; 8162 8163 for (j = 0; j < n; j++) { 8164 if (roworiented) value = v[i * n + j]; 8165 else value = v[i + j * m]; 8166 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8167 if (in[j] >= cstart && in[j] < cend) { 8168 col = in[j] - cstart; 8169 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8170 } else if (in[j] < 0) continue; 8171 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8172 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8173 } else { 8174 if (mat->was_assembled) { 8175 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8176 #if defined(PETSC_USE_CTABLE) 8177 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8178 col--; 8179 #else 8180 col = aij->colmap[in[j]] - 1; 8181 #endif 8182 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8183 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8184 col = in[j]; 8185 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8186 B = aij->B; 8187 b = (Mat_SeqAIJ *)B->data; 8188 bimax = b->imax; 8189 bi = b->i; 8190 bilen = b->ilen; 8191 bj = b->j; 8192 rp2 = bj + bi[row]; 8193 ap2 = ba + bi[row]; 8194 rmax2 = bimax[row]; 8195 nrow2 = bilen[row]; 8196 low2 = 0; 8197 high2 = nrow2; 8198 bm = aij->B->rmap->n; 8199 ba = b->a; 8200 inserted = PETSC_FALSE; 8201 } 8202 } else col = in[j]; 8203 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8204 } 8205 } 8206 } else if (!aij->donotstash) { 8207 if (roworiented) { 8208 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8209 } else { 8210 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8211 } 8212 } 8213 } 8214 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8215 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8216 } 8217 PetscFunctionReturnVoid(); 8218 } 8219 8220 /* Undefining these here since they were redefined from their original definition above! No 8221 * other PETSc functions should be defined past this point, as it is impossible to recover the 8222 * original definitions */ 8223 #undef PetscCall 8224 #undef SETERRQ 8225