1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 PetscMPIInt in; 310 311 PetscFunctionBegin; 312 PetscCall(MatGetSize(A, &m, &n)); 313 PetscCall(PetscCalloc1(n, &work)); 314 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 316 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 317 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 318 if (type == NORM_2) { 319 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 320 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 321 } else if (type == NORM_1) { 322 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 323 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 324 } else if (type == NORM_INFINITY) { 325 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 326 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 327 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 328 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 329 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 330 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 331 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 332 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 333 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 334 PetscCall(PetscMPIIntCast(n, &in)); 335 if (type == NORM_INFINITY) { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 337 } else { 338 PetscCallMPI(MPIU_Allreduce(work, reductions, in, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 339 } 340 PetscCall(PetscFree(work)); 341 if (type == NORM_2) { 342 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 343 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 344 for (i = 0; i < n; i++) reductions[i] /= m; 345 } 346 PetscFunctionReturn(PETSC_SUCCESS); 347 } 348 349 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 350 { 351 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 352 IS sis, gis; 353 const PetscInt *isis, *igis; 354 PetscInt n, *iis, nsis, ngis, rstart, i; 355 356 PetscFunctionBegin; 357 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 358 PetscCall(MatFindNonzeroRows(a->B, &gis)); 359 PetscCall(ISGetSize(gis, &ngis)); 360 PetscCall(ISGetSize(sis, &nsis)); 361 PetscCall(ISGetIndices(sis, &isis)); 362 PetscCall(ISGetIndices(gis, &igis)); 363 364 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 365 PetscCall(PetscArraycpy(iis, igis, ngis)); 366 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 367 n = ngis + nsis; 368 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 369 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 370 for (i = 0; i < n; i++) iis[i] += rstart; 371 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 372 373 PetscCall(ISRestoreIndices(sis, &isis)); 374 PetscCall(ISRestoreIndices(gis, &igis)); 375 PetscCall(ISDestroy(&sis)); 376 PetscCall(ISDestroy(&gis)); 377 PetscFunctionReturn(PETSC_SUCCESS); 378 } 379 380 /* 381 Local utility routine that creates a mapping from the global column 382 number to the local number in the off-diagonal part of the local 383 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 384 a slightly higher hash table cost; without it it is not scalable (each processor 385 has an order N integer array but is fast to access. 386 */ 387 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 388 { 389 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 390 PetscInt n = aij->B->cmap->n, i; 391 392 PetscFunctionBegin; 393 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 394 #if defined(PETSC_USE_CTABLE) 395 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 396 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 397 #else 398 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 399 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 400 #endif 401 PetscFunctionReturn(PETSC_SUCCESS); 402 } 403 404 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 405 do { \ 406 if (col <= lastcol1) low1 = 0; \ 407 else high1 = nrow1; \ 408 lastcol1 = col; \ 409 while (high1 - low1 > 5) { \ 410 t = (low1 + high1) / 2; \ 411 if (rp1[t] > col) high1 = t; \ 412 else low1 = t; \ 413 } \ 414 for (_i = low1; _i < high1; _i++) { \ 415 if (rp1[_i] > col) break; \ 416 if (rp1[_i] == col) { \ 417 if (addv == ADD_VALUES) { \ 418 ap1[_i] += value; \ 419 /* Not sure LogFlops will slow dow the code or not */ \ 420 (void)PetscLogFlops(1.0); \ 421 } else ap1[_i] = value; \ 422 goto a_noinsert; \ 423 } \ 424 } \ 425 if (value == 0.0 && ignorezeroentries && row != col) { \ 426 low1 = 0; \ 427 high1 = nrow1; \ 428 goto a_noinsert; \ 429 } \ 430 if (nonew == 1) { \ 431 low1 = 0; \ 432 high1 = nrow1; \ 433 goto a_noinsert; \ 434 } \ 435 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 436 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 437 N = nrow1++ - 1; \ 438 a->nz++; \ 439 high1++; \ 440 /* shift up all the later entries in this row */ \ 441 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 442 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 443 rp1[_i] = col; \ 444 ap1[_i] = value; \ 445 a_noinsert:; \ 446 ailen[row] = nrow1; \ 447 } while (0) 448 449 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 450 do { \ 451 if (col <= lastcol2) low2 = 0; \ 452 else high2 = nrow2; \ 453 lastcol2 = col; \ 454 while (high2 - low2 > 5) { \ 455 t = (low2 + high2) / 2; \ 456 if (rp2[t] > col) high2 = t; \ 457 else low2 = t; \ 458 } \ 459 for (_i = low2; _i < high2; _i++) { \ 460 if (rp2[_i] > col) break; \ 461 if (rp2[_i] == col) { \ 462 if (addv == ADD_VALUES) { \ 463 ap2[_i] += value; \ 464 (void)PetscLogFlops(1.0); \ 465 } else ap2[_i] = value; \ 466 goto b_noinsert; \ 467 } \ 468 } \ 469 if (value == 0.0 && ignorezeroentries) { \ 470 low2 = 0; \ 471 high2 = nrow2; \ 472 goto b_noinsert; \ 473 } \ 474 if (nonew == 1) { \ 475 low2 = 0; \ 476 high2 = nrow2; \ 477 goto b_noinsert; \ 478 } \ 479 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 480 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 481 N = nrow2++ - 1; \ 482 b->nz++; \ 483 high2++; \ 484 /* shift up all the later entries in this row */ \ 485 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 486 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 487 rp2[_i] = col; \ 488 ap2[_i] = value; \ 489 b_noinsert:; \ 490 bilen[row] = nrow2; \ 491 } while (0) 492 493 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 494 { 495 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 496 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 497 PetscInt l, *garray = mat->garray, diag; 498 PetscScalar *aa, *ba; 499 500 PetscFunctionBegin; 501 /* code only works for square matrices A */ 502 503 /* find size of row to the left of the diagonal part */ 504 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 505 row = row - diag; 506 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 507 if (garray[b->j[b->i[row] + l]] > diag) break; 508 } 509 if (l) { 510 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 511 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 512 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 513 } 514 515 /* diagonal part */ 516 if (a->i[row + 1] - a->i[row]) { 517 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 518 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 519 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 520 } 521 522 /* right of diagonal part */ 523 if (b->i[row + 1] - b->i[row] - l) { 524 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 525 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 526 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 527 } 528 PetscFunctionReturn(PETSC_SUCCESS); 529 } 530 531 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 532 { 533 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 534 PetscScalar value = 0.0; 535 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 536 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 537 PetscBool roworiented = aij->roworiented; 538 539 /* Some Variables required in the macro */ 540 Mat A = aij->A; 541 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 542 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 543 PetscBool ignorezeroentries = a->ignorezeroentries; 544 Mat B = aij->B; 545 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 546 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 547 MatScalar *aa, *ba; 548 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 549 PetscInt nonew; 550 MatScalar *ap1, *ap2; 551 552 PetscFunctionBegin; 553 PetscCall(MatSeqAIJGetArray(A, &aa)); 554 PetscCall(MatSeqAIJGetArray(B, &ba)); 555 for (i = 0; i < m; i++) { 556 if (im[i] < 0) continue; 557 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 558 if (im[i] >= rstart && im[i] < rend) { 559 row = im[i] - rstart; 560 lastcol1 = -1; 561 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 562 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 563 rmax1 = aimax[row]; 564 nrow1 = ailen[row]; 565 low1 = 0; 566 high1 = nrow1; 567 lastcol2 = -1; 568 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 569 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 570 rmax2 = bimax[row]; 571 nrow2 = bilen[row]; 572 low2 = 0; 573 high2 = nrow2; 574 575 for (j = 0; j < n; j++) { 576 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 577 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 578 if (in[j] >= cstart && in[j] < cend) { 579 col = in[j] - cstart; 580 nonew = a->nonew; 581 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 582 } else if (in[j] < 0) { 583 continue; 584 } else { 585 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 586 if (mat->was_assembled) { 587 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 588 #if defined(PETSC_USE_CTABLE) 589 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 590 col--; 591 #else 592 col = aij->colmap[in[j]] - 1; 593 #endif 594 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 595 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 596 col = in[j]; 597 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 598 B = aij->B; 599 b = (Mat_SeqAIJ *)B->data; 600 bimax = b->imax; 601 bi = b->i; 602 bilen = b->ilen; 603 bj = b->j; 604 ba = b->a; 605 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 606 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 607 rmax2 = bimax[row]; 608 nrow2 = bilen[row]; 609 low2 = 0; 610 high2 = nrow2; 611 bm = aij->B->rmap->n; 612 ba = b->a; 613 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 614 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 615 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 616 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 617 } 618 } else col = in[j]; 619 nonew = b->nonew; 620 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 621 } 622 } 623 } else { 624 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 625 if (!aij->donotstash) { 626 mat->assembled = PETSC_FALSE; 627 if (roworiented) { 628 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } else { 630 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 631 } 632 } 633 } 634 } 635 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 636 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 637 PetscFunctionReturn(PETSC_SUCCESS); 638 } 639 640 /* 641 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 642 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 643 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 644 */ 645 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 646 { 647 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 648 Mat A = aij->A; /* diagonal part of the matrix */ 649 Mat B = aij->B; /* off-diagonal part of the matrix */ 650 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 651 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 652 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 653 PetscInt *ailen = a->ilen, *aj = a->j; 654 PetscInt *bilen = b->ilen, *bj = b->j; 655 PetscInt am = aij->A->rmap->n, j; 656 PetscInt diag_so_far = 0, dnz; 657 PetscInt offd_so_far = 0, onz; 658 659 PetscFunctionBegin; 660 /* Iterate over all rows of the matrix */ 661 for (j = 0; j < am; j++) { 662 dnz = onz = 0; 663 /* Iterate over all non-zero columns of the current row */ 664 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 665 /* If column is in the diagonal */ 666 if (mat_j[col] >= cstart && mat_j[col] < cend) { 667 aj[diag_so_far++] = mat_j[col] - cstart; 668 dnz++; 669 } else { /* off-diagonal entries */ 670 bj[offd_so_far++] = mat_j[col]; 671 onz++; 672 } 673 } 674 ailen[j] = dnz; 675 bilen[j] = onz; 676 } 677 PetscFunctionReturn(PETSC_SUCCESS); 678 } 679 680 /* 681 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 682 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 683 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 684 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 685 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 686 */ 687 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 688 { 689 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 690 Mat A = aij->A; /* diagonal part of the matrix */ 691 Mat B = aij->B; /* off-diagonal part of the matrix */ 692 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 693 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 694 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 695 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 696 PetscInt *ailen = a->ilen, *aj = a->j; 697 PetscInt *bilen = b->ilen, *bj = b->j; 698 PetscInt am = aij->A->rmap->n, j; 699 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 700 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 701 PetscScalar *aa = a->a, *ba = b->a; 702 703 PetscFunctionBegin; 704 /* Iterate over all rows of the matrix */ 705 for (j = 0; j < am; j++) { 706 dnz_row = onz_row = 0; 707 rowstart_offd = full_offd_i[j]; 708 rowstart_diag = full_diag_i[j]; 709 /* Iterate over all non-zero columns of the current row */ 710 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 711 /* If column is in the diagonal */ 712 if (mat_j[col] >= cstart && mat_j[col] < cend) { 713 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 714 aa[rowstart_diag + dnz_row] = mat_a[col]; 715 dnz_row++; 716 } else { /* off-diagonal entries */ 717 bj[rowstart_offd + onz_row] = mat_j[col]; 718 ba[rowstart_offd + onz_row] = mat_a[col]; 719 onz_row++; 720 } 721 } 722 ailen[j] = dnz_row; 723 bilen[j] = onz_row; 724 } 725 PetscFunctionReturn(PETSC_SUCCESS); 726 } 727 728 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 729 { 730 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 731 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 732 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 733 734 PetscFunctionBegin; 735 for (i = 0; i < m; i++) { 736 if (idxm[i] < 0) continue; /* negative row */ 737 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 738 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 739 row = idxm[i] - rstart; 740 for (j = 0; j < n; j++) { 741 if (idxn[j] < 0) continue; /* negative column */ 742 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 743 if (idxn[j] >= cstart && idxn[j] < cend) { 744 col = idxn[j] - cstart; 745 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 746 } else { 747 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 748 #if defined(PETSC_USE_CTABLE) 749 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 750 col--; 751 #else 752 col = aij->colmap[idxn[j]] - 1; 753 #endif 754 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 755 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 756 } 757 } 758 } 759 PetscFunctionReturn(PETSC_SUCCESS); 760 } 761 762 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 763 { 764 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 765 PetscInt nstash, reallocs; 766 767 PetscFunctionBegin; 768 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 769 770 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 771 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 772 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 773 PetscFunctionReturn(PETSC_SUCCESS); 774 } 775 776 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 777 { 778 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 779 PetscMPIInt n; 780 PetscInt i, j, rstart, ncols, flg; 781 PetscInt *row, *col; 782 PetscBool other_disassembled; 783 PetscScalar *val; 784 785 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 786 787 PetscFunctionBegin; 788 if (!aij->donotstash && !mat->nooffprocentries) { 789 while (1) { 790 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 791 if (!flg) break; 792 793 for (i = 0; i < n;) { 794 /* Now identify the consecutive vals belonging to the same row */ 795 for (j = i, rstart = row[j]; j < n; j++) { 796 if (row[j] != rstart) break; 797 } 798 if (j < n) ncols = j - i; 799 else ncols = n - i; 800 /* Now assemble all these values with a single function call */ 801 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 802 i = j; 803 } 804 } 805 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 806 } 807 #if defined(PETSC_HAVE_DEVICE) 808 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 809 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 810 if (mat->boundtocpu) { 811 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 812 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 813 } 814 #endif 815 PetscCall(MatAssemblyBegin(aij->A, mode)); 816 PetscCall(MatAssemblyEnd(aij->A, mode)); 817 818 /* determine if any processor has disassembled, if so we must 819 also disassemble ourself, in order that we may reassemble. */ 820 /* 821 if nonzero structure of submatrix B cannot change then we know that 822 no processor disassembled thus we can skip this stuff 823 */ 824 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 825 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 826 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 827 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 828 } 829 } 830 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 831 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 832 #if defined(PETSC_HAVE_DEVICE) 833 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 834 #endif 835 PetscCall(MatAssemblyBegin(aij->B, mode)); 836 PetscCall(MatAssemblyEnd(aij->B, mode)); 837 838 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 839 840 aij->rowvalues = NULL; 841 842 PetscCall(VecDestroy(&aij->diag)); 843 844 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 845 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 846 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 847 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 848 } 849 #if defined(PETSC_HAVE_DEVICE) 850 mat->offloadmask = PETSC_OFFLOAD_BOTH; 851 #endif 852 PetscFunctionReturn(PETSC_SUCCESS); 853 } 854 855 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 856 { 857 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 858 859 PetscFunctionBegin; 860 PetscCall(MatZeroEntries(l->A)); 861 PetscCall(MatZeroEntries(l->B)); 862 PetscFunctionReturn(PETSC_SUCCESS); 863 } 864 865 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 866 { 867 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 868 PetscInt *lrows; 869 PetscInt r, len; 870 PetscBool cong; 871 872 PetscFunctionBegin; 873 /* get locally owned rows */ 874 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 875 PetscCall(MatHasCongruentLayouts(A, &cong)); 876 /* fix right-hand side if needed */ 877 if (x && b) { 878 const PetscScalar *xx; 879 PetscScalar *bb; 880 881 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 882 PetscCall(VecGetArrayRead(x, &xx)); 883 PetscCall(VecGetArray(b, &bb)); 884 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 885 PetscCall(VecRestoreArrayRead(x, &xx)); 886 PetscCall(VecRestoreArray(b, &bb)); 887 } 888 889 if (diag != 0.0 && cong) { 890 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 891 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 892 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 893 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 894 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 895 PetscInt nnwA, nnwB; 896 PetscBool nnzA, nnzB; 897 898 nnwA = aijA->nonew; 899 nnwB = aijB->nonew; 900 nnzA = aijA->keepnonzeropattern; 901 nnzB = aijB->keepnonzeropattern; 902 if (!nnzA) { 903 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 904 aijA->nonew = 0; 905 } 906 if (!nnzB) { 907 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 908 aijB->nonew = 0; 909 } 910 /* Must zero here before the next loop */ 911 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 912 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 913 for (r = 0; r < len; ++r) { 914 const PetscInt row = lrows[r] + A->rmap->rstart; 915 if (row >= A->cmap->N) continue; 916 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 917 } 918 aijA->nonew = nnwA; 919 aijB->nonew = nnwB; 920 } else { 921 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 922 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 923 } 924 PetscCall(PetscFree(lrows)); 925 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 926 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 927 928 /* only change matrix nonzero state if pattern was allowed to be changed */ 929 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 930 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 931 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 932 } 933 PetscFunctionReturn(PETSC_SUCCESS); 934 } 935 936 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 937 { 938 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 939 PetscInt n = A->rmap->n; 940 PetscInt i, j, r, m, len = 0; 941 PetscInt *lrows, *owners = A->rmap->range; 942 PetscMPIInt p = 0; 943 PetscSFNode *rrows; 944 PetscSF sf; 945 const PetscScalar *xx; 946 PetscScalar *bb, *mask, *aij_a; 947 Vec xmask, lmask; 948 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 949 const PetscInt *aj, *ii, *ridx; 950 PetscScalar *aa; 951 952 PetscFunctionBegin; 953 /* Create SF where leaves are input rows and roots are owned rows */ 954 PetscCall(PetscMalloc1(n, &lrows)); 955 for (r = 0; r < n; ++r) lrows[r] = -1; 956 PetscCall(PetscMalloc1(N, &rrows)); 957 for (r = 0; r < N; ++r) { 958 const PetscInt idx = rows[r]; 959 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 960 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 961 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 962 } 963 rrows[r].rank = p; 964 rrows[r].index = rows[r] - owners[p]; 965 } 966 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 967 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 968 /* Collect flags for rows to be zeroed */ 969 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 970 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 971 PetscCall(PetscSFDestroy(&sf)); 972 /* Compress and put in row numbers */ 973 for (r = 0; r < n; ++r) 974 if (lrows[r] >= 0) lrows[len++] = r; 975 /* zero diagonal part of matrix */ 976 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 977 /* handle off-diagonal part of matrix */ 978 PetscCall(MatCreateVecs(A, &xmask, NULL)); 979 PetscCall(VecDuplicate(l->lvec, &lmask)); 980 PetscCall(VecGetArray(xmask, &bb)); 981 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 982 PetscCall(VecRestoreArray(xmask, &bb)); 983 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 984 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 985 PetscCall(VecDestroy(&xmask)); 986 if (x && b) { /* this code is buggy when the row and column layout don't match */ 987 PetscBool cong; 988 989 PetscCall(MatHasCongruentLayouts(A, &cong)); 990 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 991 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 992 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 993 PetscCall(VecGetArrayRead(l->lvec, &xx)); 994 PetscCall(VecGetArray(b, &bb)); 995 } 996 PetscCall(VecGetArray(lmask, &mask)); 997 /* remove zeroed rows of off-diagonal matrix */ 998 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 999 ii = aij->i; 1000 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 1001 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1002 if (aij->compressedrow.use) { 1003 m = aij->compressedrow.nrows; 1004 ii = aij->compressedrow.i; 1005 ridx = aij->compressedrow.rindex; 1006 for (i = 0; i < m; i++) { 1007 n = ii[i + 1] - ii[i]; 1008 aj = aij->j + ii[i]; 1009 aa = aij_a + ii[i]; 1010 1011 for (j = 0; j < n; j++) { 1012 if (PetscAbsScalar(mask[*aj])) { 1013 if (b) bb[*ridx] -= *aa * xx[*aj]; 1014 *aa = 0.0; 1015 } 1016 aa++; 1017 aj++; 1018 } 1019 ridx++; 1020 } 1021 } else { /* do not use compressed row format */ 1022 m = l->B->rmap->n; 1023 for (i = 0; i < m; i++) { 1024 n = ii[i + 1] - ii[i]; 1025 aj = aij->j + ii[i]; 1026 aa = aij_a + ii[i]; 1027 for (j = 0; j < n; j++) { 1028 if (PetscAbsScalar(mask[*aj])) { 1029 if (b) bb[i] -= *aa * xx[*aj]; 1030 *aa = 0.0; 1031 } 1032 aa++; 1033 aj++; 1034 } 1035 } 1036 } 1037 if (x && b) { 1038 PetscCall(VecRestoreArray(b, &bb)); 1039 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1040 } 1041 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1042 PetscCall(VecRestoreArray(lmask, &mask)); 1043 PetscCall(VecDestroy(&lmask)); 1044 PetscCall(PetscFree(lrows)); 1045 1046 /* only change matrix nonzero state if pattern was allowed to be changed */ 1047 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1048 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1049 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1050 } 1051 PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1055 { 1056 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1057 PetscInt nt; 1058 VecScatter Mvctx = a->Mvctx; 1059 1060 PetscFunctionBegin; 1061 PetscCall(VecGetLocalSize(xx, &nt)); 1062 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1063 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->A, mult, xx, yy); 1065 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1066 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1067 PetscFunctionReturn(PETSC_SUCCESS); 1068 } 1069 1070 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1071 { 1072 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1073 1074 PetscFunctionBegin; 1075 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1076 PetscFunctionReturn(PETSC_SUCCESS); 1077 } 1078 1079 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1080 { 1081 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1082 VecScatter Mvctx = a->Mvctx; 1083 1084 PetscFunctionBegin; 1085 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1087 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1088 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1089 PetscFunctionReturn(PETSC_SUCCESS); 1090 } 1091 1092 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1093 { 1094 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1095 1096 PetscFunctionBegin; 1097 /* do nondiagonal part */ 1098 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1099 /* do local part */ 1100 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1101 /* add partial results together */ 1102 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1103 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1104 PetscFunctionReturn(PETSC_SUCCESS); 1105 } 1106 1107 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1108 { 1109 MPI_Comm comm; 1110 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1111 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1112 IS Me, Notme; 1113 PetscInt M, N, first, last, *notme, i; 1114 PetscBool lf; 1115 PetscMPIInt size; 1116 1117 PetscFunctionBegin; 1118 /* Easy test: symmetric diagonal block */ 1119 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1120 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1121 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1122 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1123 PetscCallMPI(MPI_Comm_size(comm, &size)); 1124 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1125 1126 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1127 PetscCall(MatGetSize(Amat, &M, &N)); 1128 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1129 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1130 for (i = 0; i < first; i++) notme[i] = i; 1131 for (i = last; i < M; i++) notme[i - last + first] = i; 1132 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1133 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1134 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1135 Aoff = Aoffs[0]; 1136 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1137 Boff = Boffs[0]; 1138 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1139 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1140 PetscCall(MatDestroyMatrices(1, &Boffs)); 1141 PetscCall(ISDestroy(&Me)); 1142 PetscCall(ISDestroy(&Notme)); 1143 PetscCall(PetscFree(notme)); 1144 PetscFunctionReturn(PETSC_SUCCESS); 1145 } 1146 1147 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1148 { 1149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1150 1151 PetscFunctionBegin; 1152 /* do nondiagonal part */ 1153 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1154 /* do local part */ 1155 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1156 /* add partial results together */ 1157 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1158 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1159 PetscFunctionReturn(PETSC_SUCCESS); 1160 } 1161 1162 /* 1163 This only works correctly for square matrices where the subblock A->A is the 1164 diagonal block 1165 */ 1166 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1167 { 1168 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1169 1170 PetscFunctionBegin; 1171 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1172 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1173 PetscCall(MatGetDiagonal(a->A, v)); 1174 PetscFunctionReturn(PETSC_SUCCESS); 1175 } 1176 1177 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1178 { 1179 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1180 1181 PetscFunctionBegin; 1182 PetscCall(MatScale(a->A, aa)); 1183 PetscCall(MatScale(a->B, aa)); 1184 PetscFunctionReturn(PETSC_SUCCESS); 1185 } 1186 1187 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1188 { 1189 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1190 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1191 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1192 const PetscInt *garray = aij->garray; 1193 const PetscScalar *aa, *ba; 1194 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1195 PetscInt64 nz, hnz; 1196 PetscInt *rowlens; 1197 PetscInt *colidxs; 1198 PetscScalar *matvals; 1199 PetscMPIInt rank; 1200 1201 PetscFunctionBegin; 1202 PetscCall(PetscViewerSetUp(viewer)); 1203 1204 M = mat->rmap->N; 1205 N = mat->cmap->N; 1206 m = mat->rmap->n; 1207 rs = mat->rmap->rstart; 1208 cs = mat->cmap->rstart; 1209 nz = A->nz + B->nz; 1210 1211 /* write matrix header */ 1212 header[0] = MAT_FILE_CLASSID; 1213 header[1] = M; 1214 header[2] = N; 1215 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1216 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1217 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1218 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1219 1220 /* fill in and store row lengths */ 1221 PetscCall(PetscMalloc1(m, &rowlens)); 1222 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1223 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1224 PetscCall(PetscFree(rowlens)); 1225 1226 /* fill in and store column indices */ 1227 PetscCall(PetscMalloc1(nz, &colidxs)); 1228 for (cnt = 0, i = 0; i < m; i++) { 1229 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1230 if (garray[B->j[jb]] > cs) break; 1231 colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1234 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1235 } 1236 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1237 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1238 PetscCall(PetscFree(colidxs)); 1239 1240 /* fill in and store nonzero values */ 1241 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1242 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1243 PetscCall(PetscMalloc1(nz, &matvals)); 1244 for (cnt = 0, i = 0; i < m; i++) { 1245 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1246 if (garray[B->j[jb]] > cs) break; 1247 matvals[cnt++] = ba[jb]; 1248 } 1249 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1250 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1251 } 1252 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1253 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1254 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1255 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1256 PetscCall(PetscFree(matvals)); 1257 1258 /* write block size option to the viewer's .info file */ 1259 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 #include <petscdraw.h> 1264 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1265 { 1266 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1267 PetscMPIInt rank = aij->rank, size = aij->size; 1268 PetscBool isdraw, iascii, isbinary; 1269 PetscViewer sviewer; 1270 PetscViewerFormat format; 1271 1272 PetscFunctionBegin; 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1274 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1275 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1276 if (iascii) { 1277 PetscCall(PetscViewerGetFormat(viewer, &format)); 1278 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1279 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1280 PetscCall(PetscMalloc1(size, &nz)); 1281 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1282 for (i = 0; i < size; i++) { 1283 nmax = PetscMax(nmax, nz[i]); 1284 nmin = PetscMin(nmin, nz[i]); 1285 navg += nz[i]; 1286 } 1287 PetscCall(PetscFree(nz)); 1288 navg = navg / size; 1289 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 PetscCall(PetscViewerGetFormat(viewer, &format)); 1293 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1294 MatInfo info; 1295 PetscInt *inodes = NULL; 1296 1297 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1298 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1299 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1300 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1301 if (!inodes) { 1302 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1303 info.memory)); 1304 } else { 1305 PetscCall( 1306 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1307 } 1308 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1311 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1312 PetscCall(PetscViewerFlush(viewer)); 1313 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1314 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1315 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1316 PetscFunctionReturn(PETSC_SUCCESS); 1317 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1318 PetscInt inodecount, inodelimit, *inodes; 1319 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1320 if (inodes) { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1322 } else { 1323 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1324 } 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1327 PetscFunctionReturn(PETSC_SUCCESS); 1328 } 1329 } else if (isbinary) { 1330 if (size == 1) { 1331 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1332 PetscCall(MatView(aij->A, viewer)); 1333 } else { 1334 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1335 } 1336 PetscFunctionReturn(PETSC_SUCCESS); 1337 } else if (iascii && size == 1) { 1338 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1339 PetscCall(MatView(aij->A, viewer)); 1340 PetscFunctionReturn(PETSC_SUCCESS); 1341 } else if (isdraw) { 1342 PetscDraw draw; 1343 PetscBool isnull; 1344 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1345 PetscCall(PetscDrawIsNull(draw, &isnull)); 1346 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1347 } 1348 1349 { /* assemble the entire matrix onto first processor */ 1350 Mat A = NULL, Av; 1351 IS isrow, iscol; 1352 1353 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1354 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1355 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1356 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1357 /* The commented code uses MatCreateSubMatrices instead */ 1358 /* 1359 Mat *AA, A = NULL, Av; 1360 IS isrow,iscol; 1361 1362 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1363 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1364 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1365 if (rank == 0) { 1366 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1367 A = AA[0]; 1368 Av = AA[0]; 1369 } 1370 PetscCall(MatDestroySubMatrices(1,&AA)); 1371 */ 1372 PetscCall(ISDestroy(&iscol)); 1373 PetscCall(ISDestroy(&isrow)); 1374 /* 1375 Everyone has to call to draw the matrix since the graphics waits are 1376 synchronized across all processors that share the PetscDraw object 1377 */ 1378 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1379 if (rank == 0) { 1380 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1381 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1382 } 1383 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1384 PetscCall(MatDestroy(&A)); 1385 } 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1390 { 1391 PetscBool iascii, isdraw, issocket, isbinary; 1392 1393 PetscFunctionBegin; 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1396 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1397 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1398 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1403 { 1404 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1405 Vec bb1 = NULL; 1406 PetscBool hasop; 1407 1408 PetscFunctionBegin; 1409 if (flag == SOR_APPLY_UPPER) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1415 1416 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1417 if (flag & SOR_ZERO_INITIAL_GUESS) { 1418 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1419 its--; 1420 } 1421 1422 while (its--) { 1423 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1424 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1425 1426 /* update rhs: bb1 = bb - B*x */ 1427 PetscCall(VecScale(mat->lvec, -1.0)); 1428 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1429 1430 /* local sweep */ 1431 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1432 } 1433 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1434 if (flag & SOR_ZERO_INITIAL_GUESS) { 1435 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1436 its--; 1437 } 1438 while (its--) { 1439 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1440 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1441 1442 /* update rhs: bb1 = bb - B*x */ 1443 PetscCall(VecScale(mat->lvec, -1.0)); 1444 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1445 1446 /* local sweep */ 1447 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1448 } 1449 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1450 if (flag & SOR_ZERO_INITIAL_GUESS) { 1451 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1452 its--; 1453 } 1454 while (its--) { 1455 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1456 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1457 1458 /* update rhs: bb1 = bb - B*x */ 1459 PetscCall(VecScale(mat->lvec, -1.0)); 1460 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1464 } 1465 } else if (flag & SOR_EISENSTAT) { 1466 Vec xx1; 1467 1468 PetscCall(VecDuplicate(bb, &xx1)); 1469 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1470 1471 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1472 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1473 if (!mat->diag) { 1474 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1475 PetscCall(MatGetDiagonal(matin, mat->diag)); 1476 } 1477 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1478 if (hasop) { 1479 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1480 } else { 1481 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1482 } 1483 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1484 1485 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1486 1487 /* local sweep */ 1488 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1489 PetscCall(VecAXPY(xx, 1.0, xx1)); 1490 PetscCall(VecDestroy(&xx1)); 1491 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1492 1493 PetscCall(VecDestroy(&bb1)); 1494 1495 matin->factorerrortype = mat->A->factorerrortype; 1496 PetscFunctionReturn(PETSC_SUCCESS); 1497 } 1498 1499 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1500 { 1501 Mat aA, aB, Aperm; 1502 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1503 PetscScalar *aa, *ba; 1504 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1505 PetscSF rowsf, sf; 1506 IS parcolp = NULL; 1507 PetscBool done; 1508 1509 PetscFunctionBegin; 1510 PetscCall(MatGetLocalSize(A, &m, &n)); 1511 PetscCall(ISGetIndices(rowp, &rwant)); 1512 PetscCall(ISGetIndices(colp, &cwant)); 1513 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1514 1515 /* Invert row permutation to find out where my rows should go */ 1516 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1517 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1518 PetscCall(PetscSFSetFromOptions(rowsf)); 1519 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1520 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1521 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1522 1523 /* Invert column permutation to find out where my columns should go */ 1524 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1525 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1526 PetscCall(PetscSFSetFromOptions(sf)); 1527 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1528 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1529 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1530 PetscCall(PetscSFDestroy(&sf)); 1531 1532 PetscCall(ISRestoreIndices(rowp, &rwant)); 1533 PetscCall(ISRestoreIndices(colp, &cwant)); 1534 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1535 1536 /* Find out where my gcols should go */ 1537 PetscCall(MatGetSize(aB, NULL, &ng)); 1538 PetscCall(PetscMalloc1(ng, &gcdest)); 1539 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1540 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1541 PetscCall(PetscSFSetFromOptions(sf)); 1542 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1544 PetscCall(PetscSFDestroy(&sf)); 1545 1546 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1547 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1548 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1549 for (i = 0; i < m; i++) { 1550 PetscInt row = rdest[i]; 1551 PetscMPIInt rowner; 1552 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1553 for (j = ai[i]; j < ai[i + 1]; j++) { 1554 PetscInt col = cdest[aj[j]]; 1555 PetscMPIInt cowner; 1556 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1557 if (rowner == cowner) dnnz[i]++; 1558 else onnz[i]++; 1559 } 1560 for (j = bi[i]; j < bi[i + 1]; j++) { 1561 PetscInt col = gcdest[bj[j]]; 1562 PetscMPIInt cowner; 1563 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1564 if (rowner == cowner) dnnz[i]++; 1565 else onnz[i]++; 1566 } 1567 } 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1570 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1571 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1572 PetscCall(PetscSFDestroy(&rowsf)); 1573 1574 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1575 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1576 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1577 for (i = 0; i < m; i++) { 1578 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1579 PetscInt j0, rowlen; 1580 rowlen = ai[i + 1] - ai[i]; 1581 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1582 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1583 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1584 } 1585 rowlen = bi[i + 1] - bi[i]; 1586 for (j0 = j = 0; j < rowlen; j0 = j) { 1587 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1588 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1589 } 1590 } 1591 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1592 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1593 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1594 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1595 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1596 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1597 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1598 PetscCall(PetscFree3(work, rdest, cdest)); 1599 PetscCall(PetscFree(gcdest)); 1600 if (parcolp) PetscCall(ISDestroy(&colp)); 1601 *B = Aperm; 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1606 { 1607 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1608 1609 PetscFunctionBegin; 1610 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1611 if (ghosts) *ghosts = aij->garray; 1612 PetscFunctionReturn(PETSC_SUCCESS); 1613 } 1614 1615 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1616 { 1617 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1618 Mat A = mat->A, B = mat->B; 1619 PetscLogDouble isend[5], irecv[5]; 1620 1621 PetscFunctionBegin; 1622 info->block_size = 1.0; 1623 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1624 1625 isend[0] = info->nz_used; 1626 isend[1] = info->nz_allocated; 1627 isend[2] = info->nz_unneeded; 1628 isend[3] = info->memory; 1629 isend[4] = info->mallocs; 1630 1631 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1632 1633 isend[0] += info->nz_used; 1634 isend[1] += info->nz_allocated; 1635 isend[2] += info->nz_unneeded; 1636 isend[3] += info->memory; 1637 isend[4] += info->mallocs; 1638 if (flag == MAT_LOCAL) { 1639 info->nz_used = isend[0]; 1640 info->nz_allocated = isend[1]; 1641 info->nz_unneeded = isend[2]; 1642 info->memory = isend[3]; 1643 info->mallocs = isend[4]; 1644 } else if (flag == MAT_GLOBAL_MAX) { 1645 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1646 1647 info->nz_used = irecv[0]; 1648 info->nz_allocated = irecv[1]; 1649 info->nz_unneeded = irecv[2]; 1650 info->memory = irecv[3]; 1651 info->mallocs = irecv[4]; 1652 } else if (flag == MAT_GLOBAL_SUM) { 1653 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1654 1655 info->nz_used = irecv[0]; 1656 info->nz_allocated = irecv[1]; 1657 info->nz_unneeded = irecv[2]; 1658 info->memory = irecv[3]; 1659 info->mallocs = irecv[4]; 1660 } 1661 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1662 info->fill_ratio_needed = 0; 1663 info->factor_mallocs = 0; 1664 PetscFunctionReturn(PETSC_SUCCESS); 1665 } 1666 1667 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1668 { 1669 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1670 1671 PetscFunctionBegin; 1672 switch (op) { 1673 case MAT_NEW_NONZERO_LOCATIONS: 1674 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1675 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1676 case MAT_KEEP_NONZERO_PATTERN: 1677 case MAT_NEW_NONZERO_LOCATION_ERR: 1678 case MAT_USE_INODES: 1679 case MAT_IGNORE_ZERO_ENTRIES: 1680 case MAT_FORM_EXPLICIT_TRANSPOSE: 1681 MatCheckPreallocated(A, 1); 1682 PetscCall(MatSetOption(a->A, op, flg)); 1683 PetscCall(MatSetOption(a->B, op, flg)); 1684 break; 1685 case MAT_ROW_ORIENTED: 1686 MatCheckPreallocated(A, 1); 1687 a->roworiented = flg; 1688 1689 PetscCall(MatSetOption(a->A, op, flg)); 1690 PetscCall(MatSetOption(a->B, op, flg)); 1691 break; 1692 case MAT_FORCE_DIAGONAL_ENTRIES: 1693 case MAT_SORTED_FULL: 1694 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1695 break; 1696 case MAT_IGNORE_OFF_PROC_ENTRIES: 1697 a->donotstash = flg; 1698 break; 1699 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1700 case MAT_SPD: 1701 case MAT_SYMMETRIC: 1702 case MAT_STRUCTURALLY_SYMMETRIC: 1703 case MAT_HERMITIAN: 1704 case MAT_SYMMETRY_ETERNAL: 1705 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1706 case MAT_SPD_ETERNAL: 1707 /* if the diagonal matrix is square it inherits some of the properties above */ 1708 break; 1709 case MAT_SUBMAT_SINGLEIS: 1710 A->submat_singleis = flg; 1711 break; 1712 case MAT_STRUCTURE_ONLY: 1713 /* The option is handled directly by MatSetOption() */ 1714 break; 1715 default: 1716 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1717 } 1718 PetscFunctionReturn(PETSC_SUCCESS); 1719 } 1720 1721 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1722 { 1723 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1724 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1725 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1726 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1727 PetscInt *cmap, *idx_p; 1728 1729 PetscFunctionBegin; 1730 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1731 mat->getrowactive = PETSC_TRUE; 1732 1733 if (!mat->rowvalues && (idx || v)) { 1734 /* 1735 allocate enough space to hold information from the longest row. 1736 */ 1737 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1738 PetscInt max = 1, tmp; 1739 for (i = 0; i < matin->rmap->n; i++) { 1740 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1741 if (max < tmp) max = tmp; 1742 } 1743 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1744 } 1745 1746 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1747 lrow = row - rstart; 1748 1749 pvA = &vworkA; 1750 pcA = &cworkA; 1751 pvB = &vworkB; 1752 pcB = &cworkB; 1753 if (!v) { 1754 pvA = NULL; 1755 pvB = NULL; 1756 } 1757 if (!idx) { 1758 pcA = NULL; 1759 if (!v) pcB = NULL; 1760 } 1761 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1762 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1763 nztot = nzA + nzB; 1764 1765 cmap = mat->garray; 1766 if (v || idx) { 1767 if (nztot) { 1768 /* Sort by increasing column numbers, assuming A and B already sorted */ 1769 PetscInt imark = -1; 1770 if (v) { 1771 *v = v_p = mat->rowvalues; 1772 for (i = 0; i < nzB; i++) { 1773 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1774 else break; 1775 } 1776 imark = i; 1777 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1778 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1779 } 1780 if (idx) { 1781 *idx = idx_p = mat->rowindices; 1782 if (imark > -1) { 1783 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1784 } else { 1785 for (i = 0; i < nzB; i++) { 1786 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1787 else break; 1788 } 1789 imark = i; 1790 } 1791 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1792 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1793 } 1794 } else { 1795 if (idx) *idx = NULL; 1796 if (v) *v = NULL; 1797 } 1798 } 1799 *nz = nztot; 1800 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1801 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1806 { 1807 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1808 1809 PetscFunctionBegin; 1810 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1811 aij->getrowactive = PETSC_FALSE; 1812 PetscFunctionReturn(PETSC_SUCCESS); 1813 } 1814 1815 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1816 { 1817 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1818 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1819 PetscInt i, j, cstart = mat->cmap->rstart; 1820 PetscReal sum = 0.0; 1821 const MatScalar *v, *amata, *bmata; 1822 PetscMPIInt iN; 1823 1824 PetscFunctionBegin; 1825 if (aij->size == 1) { 1826 PetscCall(MatNorm(aij->A, type, norm)); 1827 } else { 1828 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1829 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1830 if (type == NORM_FROBENIUS) { 1831 v = amata; 1832 for (i = 0; i < amat->nz; i++) { 1833 sum += PetscRealPart(PetscConj(*v) * (*v)); 1834 v++; 1835 } 1836 v = bmata; 1837 for (i = 0; i < bmat->nz; i++) { 1838 sum += PetscRealPart(PetscConj(*v) * (*v)); 1839 v++; 1840 } 1841 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1842 *norm = PetscSqrtReal(*norm); 1843 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1844 } else if (type == NORM_1) { /* max column norm */ 1845 PetscReal *tmp, *tmp2; 1846 PetscInt *jj, *garray = aij->garray; 1847 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1848 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1849 *norm = 0.0; 1850 v = amata; 1851 jj = amat->j; 1852 for (j = 0; j < amat->nz; j++) { 1853 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1854 v++; 1855 } 1856 v = bmata; 1857 jj = bmat->j; 1858 for (j = 0; j < bmat->nz; j++) { 1859 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1860 v++; 1861 } 1862 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 1863 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1864 for (j = 0; j < mat->cmap->N; j++) { 1865 if (tmp2[j] > *norm) *norm = tmp2[j]; 1866 } 1867 PetscCall(PetscFree(tmp)); 1868 PetscCall(PetscFree(tmp2)); 1869 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1870 } else if (type == NORM_INFINITY) { /* max row norm */ 1871 PetscReal ntemp = 0.0; 1872 for (j = 0; j < aij->A->rmap->n; j++) { 1873 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1874 sum = 0.0; 1875 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1876 sum += PetscAbsScalar(*v); 1877 v++; 1878 } 1879 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1880 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1881 sum += PetscAbsScalar(*v); 1882 v++; 1883 } 1884 if (sum > ntemp) ntemp = sum; 1885 } 1886 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1887 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1888 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1889 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1890 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1891 } 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1896 { 1897 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1898 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1899 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1900 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1901 Mat B, A_diag, *B_diag; 1902 const MatScalar *pbv, *bv; 1903 1904 PetscFunctionBegin; 1905 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1906 ma = A->rmap->n; 1907 na = A->cmap->n; 1908 mb = a->B->rmap->n; 1909 nb = a->B->cmap->n; 1910 ai = Aloc->i; 1911 aj = Aloc->j; 1912 bi = Bloc->i; 1913 bj = Bloc->j; 1914 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1915 PetscInt *d_nnz, *g_nnz, *o_nnz; 1916 PetscSFNode *oloc; 1917 PETSC_UNUSED PetscSF sf; 1918 1919 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1920 /* compute d_nnz for preallocation */ 1921 PetscCall(PetscArrayzero(d_nnz, na)); 1922 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1923 /* compute local off-diagonal contributions */ 1924 PetscCall(PetscArrayzero(g_nnz, nb)); 1925 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1926 /* map those to global */ 1927 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1928 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1929 PetscCall(PetscSFSetFromOptions(sf)); 1930 PetscCall(PetscArrayzero(o_nnz, na)); 1931 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1932 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1933 PetscCall(PetscSFDestroy(&sf)); 1934 1935 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1936 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1937 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1938 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1939 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1940 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1941 } else { 1942 B = *matout; 1943 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1944 } 1945 1946 b = (Mat_MPIAIJ *)B->data; 1947 A_diag = a->A; 1948 B_diag = &b->A; 1949 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1950 A_diag_ncol = A_diag->cmap->N; 1951 B_diag_ilen = sub_B_diag->ilen; 1952 B_diag_i = sub_B_diag->i; 1953 1954 /* Set ilen for diagonal of B */ 1955 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1956 1957 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1958 very quickly (=without using MatSetValues), because all writes are local. */ 1959 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1960 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1961 1962 /* copy over the B part */ 1963 PetscCall(PetscMalloc1(bi[mb], &cols)); 1964 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1965 pbv = bv; 1966 row = A->rmap->rstart; 1967 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1968 cols_tmp = cols; 1969 for (i = 0; i < mb; i++) { 1970 ncol = bi[i + 1] - bi[i]; 1971 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1972 row++; 1973 if (pbv) pbv += ncol; 1974 if (cols_tmp) cols_tmp += ncol; 1975 } 1976 PetscCall(PetscFree(cols)); 1977 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1978 1979 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1980 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1981 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1982 *matout = B; 1983 } else { 1984 PetscCall(MatHeaderMerge(A, &B)); 1985 } 1986 PetscFunctionReturn(PETSC_SUCCESS); 1987 } 1988 1989 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1990 { 1991 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1992 Mat a = aij->A, b = aij->B; 1993 PetscInt s1, s2, s3; 1994 1995 PetscFunctionBegin; 1996 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1997 if (rr) { 1998 PetscCall(VecGetLocalSize(rr, &s1)); 1999 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 2000 /* Overlap communication with computation. */ 2001 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 } 2003 if (ll) { 2004 PetscCall(VecGetLocalSize(ll, &s1)); 2005 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 2006 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 2007 } 2008 /* scale the diagonal block */ 2009 PetscUseTypeMethod(a, diagonalscale, ll, rr); 2010 2011 if (rr) { 2012 /* Do a scatter end and then right scale the off-diagonal block */ 2013 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2014 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2015 } 2016 PetscFunctionReturn(PETSC_SUCCESS); 2017 } 2018 2019 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2020 { 2021 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2022 2023 PetscFunctionBegin; 2024 PetscCall(MatSetUnfactored(a->A)); 2025 PetscFunctionReturn(PETSC_SUCCESS); 2026 } 2027 2028 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2029 { 2030 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2031 Mat a, b, c, d; 2032 PetscBool flg; 2033 2034 PetscFunctionBegin; 2035 a = matA->A; 2036 b = matA->B; 2037 c = matB->A; 2038 d = matB->B; 2039 2040 PetscCall(MatEqual(a, c, &flg)); 2041 if (flg) PetscCall(MatEqual(b, d, &flg)); 2042 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2043 PetscFunctionReturn(PETSC_SUCCESS); 2044 } 2045 2046 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2047 { 2048 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2049 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2050 2051 PetscFunctionBegin; 2052 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2053 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2054 /* because of the column compression in the off-processor part of the matrix a->B, 2055 the number of columns in a->B and b->B may be different, hence we cannot call 2056 the MatCopy() directly on the two parts. If need be, we can provide a more 2057 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2058 then copying the submatrices */ 2059 PetscCall(MatCopy_Basic(A, B, str)); 2060 } else { 2061 PetscCall(MatCopy(a->A, b->A, str)); 2062 PetscCall(MatCopy(a->B, b->B, str)); 2063 } 2064 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2065 PetscFunctionReturn(PETSC_SUCCESS); 2066 } 2067 2068 /* 2069 Computes the number of nonzeros per row needed for preallocation when X and Y 2070 have different nonzero structure. 2071 */ 2072 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2073 { 2074 PetscInt i, j, k, nzx, nzy; 2075 2076 PetscFunctionBegin; 2077 /* Set the number of nonzeros in the new matrix */ 2078 for (i = 0; i < m; i++) { 2079 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2080 nzx = xi[i + 1] - xi[i]; 2081 nzy = yi[i + 1] - yi[i]; 2082 nnz[i] = 0; 2083 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2084 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2085 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2086 nnz[i]++; 2087 } 2088 for (; k < nzy; k++) nnz[i]++; 2089 } 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2094 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2095 { 2096 PetscInt m = Y->rmap->N; 2097 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2098 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2099 2100 PetscFunctionBegin; 2101 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2106 { 2107 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2108 2109 PetscFunctionBegin; 2110 if (str == SAME_NONZERO_PATTERN) { 2111 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2112 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2113 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2114 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2115 } else { 2116 Mat B; 2117 PetscInt *nnz_d, *nnz_o; 2118 2119 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2120 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2121 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2122 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2123 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2124 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2125 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2126 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2127 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2128 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2129 PetscCall(MatHeaderMerge(Y, &B)); 2130 PetscCall(PetscFree(nnz_d)); 2131 PetscCall(PetscFree(nnz_o)); 2132 } 2133 PetscFunctionReturn(PETSC_SUCCESS); 2134 } 2135 2136 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2137 2138 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2139 { 2140 PetscFunctionBegin; 2141 if (PetscDefined(USE_COMPLEX)) { 2142 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2143 2144 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2145 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2146 } 2147 PetscFunctionReturn(PETSC_SUCCESS); 2148 } 2149 2150 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2151 { 2152 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2153 2154 PetscFunctionBegin; 2155 PetscCall(MatRealPart(a->A)); 2156 PetscCall(MatRealPart(a->B)); 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2161 { 2162 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2163 2164 PetscFunctionBegin; 2165 PetscCall(MatImaginaryPart(a->A)); 2166 PetscCall(MatImaginaryPart(a->B)); 2167 PetscFunctionReturn(PETSC_SUCCESS); 2168 } 2169 2170 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2171 { 2172 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2173 PetscInt i, *idxb = NULL, m = A->rmap->n; 2174 PetscScalar *va, *vv; 2175 Vec vB, vA; 2176 const PetscScalar *vb; 2177 2178 PetscFunctionBegin; 2179 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2180 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2181 2182 PetscCall(VecGetArrayWrite(vA, &va)); 2183 if (idx) { 2184 for (i = 0; i < m; i++) { 2185 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2186 } 2187 } 2188 2189 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2190 PetscCall(PetscMalloc1(m, &idxb)); 2191 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2192 2193 PetscCall(VecGetArrayWrite(v, &vv)); 2194 PetscCall(VecGetArrayRead(vB, &vb)); 2195 for (i = 0; i < m; i++) { 2196 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2197 vv[i] = vb[i]; 2198 if (idx) idx[i] = a->garray[idxb[i]]; 2199 } else { 2200 vv[i] = va[i]; 2201 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2202 } 2203 } 2204 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2205 PetscCall(VecRestoreArrayWrite(vA, &va)); 2206 PetscCall(VecRestoreArrayRead(vB, &vb)); 2207 PetscCall(PetscFree(idxb)); 2208 PetscCall(VecDestroy(&vA)); 2209 PetscCall(VecDestroy(&vB)); 2210 PetscFunctionReturn(PETSC_SUCCESS); 2211 } 2212 2213 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2214 { 2215 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2216 Vec vB, vA; 2217 2218 PetscFunctionBegin; 2219 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2220 PetscCall(MatGetRowSumAbs(a->A, vA)); 2221 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2222 PetscCall(MatGetRowSumAbs(a->B, vB)); 2223 PetscCall(VecAXPY(vA, 1.0, vB)); 2224 PetscCall(VecDestroy(&vB)); 2225 PetscCall(VecCopy(vA, v)); 2226 PetscCall(VecDestroy(&vA)); 2227 PetscFunctionReturn(PETSC_SUCCESS); 2228 } 2229 2230 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2231 { 2232 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2233 PetscInt m = A->rmap->n, n = A->cmap->n; 2234 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2235 PetscInt *cmap = mat->garray; 2236 PetscInt *diagIdx, *offdiagIdx; 2237 Vec diagV, offdiagV; 2238 PetscScalar *a, *diagA, *offdiagA; 2239 const PetscScalar *ba, *bav; 2240 PetscInt r, j, col, ncols, *bi, *bj; 2241 Mat B = mat->B; 2242 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2243 2244 PetscFunctionBegin; 2245 /* When a process holds entire A and other processes have no entry */ 2246 if (A->cmap->N == n) { 2247 PetscCall(VecGetArrayWrite(v, &diagA)); 2248 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2249 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2250 PetscCall(VecDestroy(&diagV)); 2251 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2252 PetscFunctionReturn(PETSC_SUCCESS); 2253 } else if (n == 0) { 2254 if (m) { 2255 PetscCall(VecGetArrayWrite(v, &a)); 2256 for (r = 0; r < m; r++) { 2257 a[r] = 0.0; 2258 if (idx) idx[r] = -1; 2259 } 2260 PetscCall(VecRestoreArrayWrite(v, &a)); 2261 } 2262 PetscFunctionReturn(PETSC_SUCCESS); 2263 } 2264 2265 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2266 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2267 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2268 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2269 2270 /* Get offdiagIdx[] for implicit 0.0 */ 2271 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2272 ba = bav; 2273 bi = b->i; 2274 bj = b->j; 2275 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2276 for (r = 0; r < m; r++) { 2277 ncols = bi[r + 1] - bi[r]; 2278 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2279 offdiagA[r] = *ba; 2280 offdiagIdx[r] = cmap[0]; 2281 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2282 offdiagA[r] = 0.0; 2283 2284 /* Find first hole in the cmap */ 2285 for (j = 0; j < ncols; j++) { 2286 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2287 if (col > j && j < cstart) { 2288 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2289 break; 2290 } else if (col > j + n && j >= cstart) { 2291 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2292 break; 2293 } 2294 } 2295 if (j == ncols && ncols < A->cmap->N - n) { 2296 /* a hole is outside compressed Bcols */ 2297 if (ncols == 0) { 2298 if (cstart) { 2299 offdiagIdx[r] = 0; 2300 } else offdiagIdx[r] = cend; 2301 } else { /* ncols > 0 */ 2302 offdiagIdx[r] = cmap[ncols - 1] + 1; 2303 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2304 } 2305 } 2306 } 2307 2308 for (j = 0; j < ncols; j++) { 2309 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2310 offdiagA[r] = *ba; 2311 offdiagIdx[r] = cmap[*bj]; 2312 } 2313 ba++; 2314 bj++; 2315 } 2316 } 2317 2318 PetscCall(VecGetArrayWrite(v, &a)); 2319 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2320 for (r = 0; r < m; ++r) { 2321 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2322 a[r] = diagA[r]; 2323 if (idx) idx[r] = cstart + diagIdx[r]; 2324 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2325 a[r] = diagA[r]; 2326 if (idx) { 2327 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2328 idx[r] = cstart + diagIdx[r]; 2329 } else idx[r] = offdiagIdx[r]; 2330 } 2331 } else { 2332 a[r] = offdiagA[r]; 2333 if (idx) idx[r] = offdiagIdx[r]; 2334 } 2335 } 2336 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2337 PetscCall(VecRestoreArrayWrite(v, &a)); 2338 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2339 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2340 PetscCall(VecDestroy(&diagV)); 2341 PetscCall(VecDestroy(&offdiagV)); 2342 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2343 PetscFunctionReturn(PETSC_SUCCESS); 2344 } 2345 2346 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2347 { 2348 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2349 PetscInt m = A->rmap->n, n = A->cmap->n; 2350 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2351 PetscInt *cmap = mat->garray; 2352 PetscInt *diagIdx, *offdiagIdx; 2353 Vec diagV, offdiagV; 2354 PetscScalar *a, *diagA, *offdiagA; 2355 const PetscScalar *ba, *bav; 2356 PetscInt r, j, col, ncols, *bi, *bj; 2357 Mat B = mat->B; 2358 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2359 2360 PetscFunctionBegin; 2361 /* When a process holds entire A and other processes have no entry */ 2362 if (A->cmap->N == n) { 2363 PetscCall(VecGetArrayWrite(v, &diagA)); 2364 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2365 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2366 PetscCall(VecDestroy(&diagV)); 2367 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2368 PetscFunctionReturn(PETSC_SUCCESS); 2369 } else if (n == 0) { 2370 if (m) { 2371 PetscCall(VecGetArrayWrite(v, &a)); 2372 for (r = 0; r < m; r++) { 2373 a[r] = PETSC_MAX_REAL; 2374 if (idx) idx[r] = -1; 2375 } 2376 PetscCall(VecRestoreArrayWrite(v, &a)); 2377 } 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2382 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2383 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2384 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2385 2386 /* Get offdiagIdx[] for implicit 0.0 */ 2387 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2388 ba = bav; 2389 bi = b->i; 2390 bj = b->j; 2391 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2392 for (r = 0; r < m; r++) { 2393 ncols = bi[r + 1] - bi[r]; 2394 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2395 offdiagA[r] = *ba; 2396 offdiagIdx[r] = cmap[0]; 2397 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2398 offdiagA[r] = 0.0; 2399 2400 /* Find first hole in the cmap */ 2401 for (j = 0; j < ncols; j++) { 2402 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2403 if (col > j && j < cstart) { 2404 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2405 break; 2406 } else if (col > j + n && j >= cstart) { 2407 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2408 break; 2409 } 2410 } 2411 if (j == ncols && ncols < A->cmap->N - n) { 2412 /* a hole is outside compressed Bcols */ 2413 if (ncols == 0) { 2414 if (cstart) { 2415 offdiagIdx[r] = 0; 2416 } else offdiagIdx[r] = cend; 2417 } else { /* ncols > 0 */ 2418 offdiagIdx[r] = cmap[ncols - 1] + 1; 2419 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2420 } 2421 } 2422 } 2423 2424 for (j = 0; j < ncols; j++) { 2425 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2426 offdiagA[r] = *ba; 2427 offdiagIdx[r] = cmap[*bj]; 2428 } 2429 ba++; 2430 bj++; 2431 } 2432 } 2433 2434 PetscCall(VecGetArrayWrite(v, &a)); 2435 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2436 for (r = 0; r < m; ++r) { 2437 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2438 a[r] = diagA[r]; 2439 if (idx) idx[r] = cstart + diagIdx[r]; 2440 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2441 a[r] = diagA[r]; 2442 if (idx) { 2443 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2444 idx[r] = cstart + diagIdx[r]; 2445 } else idx[r] = offdiagIdx[r]; 2446 } 2447 } else { 2448 a[r] = offdiagA[r]; 2449 if (idx) idx[r] = offdiagIdx[r]; 2450 } 2451 } 2452 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2453 PetscCall(VecRestoreArrayWrite(v, &a)); 2454 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2455 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2456 PetscCall(VecDestroy(&diagV)); 2457 PetscCall(VecDestroy(&offdiagV)); 2458 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2459 PetscFunctionReturn(PETSC_SUCCESS); 2460 } 2461 2462 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2463 { 2464 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2465 PetscInt m = A->rmap->n, n = A->cmap->n; 2466 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2467 PetscInt *cmap = mat->garray; 2468 PetscInt *diagIdx, *offdiagIdx; 2469 Vec diagV, offdiagV; 2470 PetscScalar *a, *diagA, *offdiagA; 2471 const PetscScalar *ba, *bav; 2472 PetscInt r, j, col, ncols, *bi, *bj; 2473 Mat B = mat->B; 2474 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2475 2476 PetscFunctionBegin; 2477 /* When a process holds entire A and other processes have no entry */ 2478 if (A->cmap->N == n) { 2479 PetscCall(VecGetArrayWrite(v, &diagA)); 2480 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2481 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2482 PetscCall(VecDestroy(&diagV)); 2483 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2484 PetscFunctionReturn(PETSC_SUCCESS); 2485 } else if (n == 0) { 2486 if (m) { 2487 PetscCall(VecGetArrayWrite(v, &a)); 2488 for (r = 0; r < m; r++) { 2489 a[r] = PETSC_MIN_REAL; 2490 if (idx) idx[r] = -1; 2491 } 2492 PetscCall(VecRestoreArrayWrite(v, &a)); 2493 } 2494 PetscFunctionReturn(PETSC_SUCCESS); 2495 } 2496 2497 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2498 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2499 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2500 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2501 2502 /* Get offdiagIdx[] for implicit 0.0 */ 2503 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2504 ba = bav; 2505 bi = b->i; 2506 bj = b->j; 2507 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2508 for (r = 0; r < m; r++) { 2509 ncols = bi[r + 1] - bi[r]; 2510 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2511 offdiagA[r] = *ba; 2512 offdiagIdx[r] = cmap[0]; 2513 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2514 offdiagA[r] = 0.0; 2515 2516 /* Find first hole in the cmap */ 2517 for (j = 0; j < ncols; j++) { 2518 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2519 if (col > j && j < cstart) { 2520 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2521 break; 2522 } else if (col > j + n && j >= cstart) { 2523 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2524 break; 2525 } 2526 } 2527 if (j == ncols && ncols < A->cmap->N - n) { 2528 /* a hole is outside compressed Bcols */ 2529 if (ncols == 0) { 2530 if (cstart) { 2531 offdiagIdx[r] = 0; 2532 } else offdiagIdx[r] = cend; 2533 } else { /* ncols > 0 */ 2534 offdiagIdx[r] = cmap[ncols - 1] + 1; 2535 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2536 } 2537 } 2538 } 2539 2540 for (j = 0; j < ncols; j++) { 2541 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2542 offdiagA[r] = *ba; 2543 offdiagIdx[r] = cmap[*bj]; 2544 } 2545 ba++; 2546 bj++; 2547 } 2548 } 2549 2550 PetscCall(VecGetArrayWrite(v, &a)); 2551 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2552 for (r = 0; r < m; ++r) { 2553 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2554 a[r] = diagA[r]; 2555 if (idx) idx[r] = cstart + diagIdx[r]; 2556 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2557 a[r] = diagA[r]; 2558 if (idx) { 2559 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2560 idx[r] = cstart + diagIdx[r]; 2561 } else idx[r] = offdiagIdx[r]; 2562 } 2563 } else { 2564 a[r] = offdiagA[r]; 2565 if (idx) idx[r] = offdiagIdx[r]; 2566 } 2567 } 2568 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2569 PetscCall(VecRestoreArrayWrite(v, &a)); 2570 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2571 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2572 PetscCall(VecDestroy(&diagV)); 2573 PetscCall(VecDestroy(&offdiagV)); 2574 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2579 { 2580 Mat *dummy; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2584 *newmat = *dummy; 2585 PetscCall(PetscFree(dummy)); 2586 PetscFunctionReturn(PETSC_SUCCESS); 2587 } 2588 2589 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2590 { 2591 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2592 2593 PetscFunctionBegin; 2594 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2595 A->factorerrortype = a->A->factorerrortype; 2596 PetscFunctionReturn(PETSC_SUCCESS); 2597 } 2598 2599 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2600 { 2601 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2602 2603 PetscFunctionBegin; 2604 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2605 PetscCall(MatSetRandom(aij->A, rctx)); 2606 if (x->assembled) { 2607 PetscCall(MatSetRandom(aij->B, rctx)); 2608 } else { 2609 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2610 } 2611 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2612 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2613 PetscFunctionReturn(PETSC_SUCCESS); 2614 } 2615 2616 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2617 { 2618 PetscFunctionBegin; 2619 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2620 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2621 PetscFunctionReturn(PETSC_SUCCESS); 2622 } 2623 2624 /*@ 2625 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2626 2627 Not Collective 2628 2629 Input Parameter: 2630 . A - the matrix 2631 2632 Output Parameter: 2633 . nz - the number of nonzeros 2634 2635 Level: advanced 2636 2637 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2638 @*/ 2639 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2640 { 2641 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2642 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2643 PetscBool isaij; 2644 2645 PetscFunctionBegin; 2646 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2647 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2648 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@ 2653 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2654 2655 Collective 2656 2657 Input Parameters: 2658 + A - the matrix 2659 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2660 2661 Level: advanced 2662 2663 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2664 @*/ 2665 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2666 { 2667 PetscFunctionBegin; 2668 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) 2673 { 2674 PetscBool sc = PETSC_FALSE, flg; 2675 2676 PetscFunctionBegin; 2677 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2678 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2679 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2680 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2681 PetscOptionsHeadEnd(); 2682 PetscFunctionReturn(PETSC_SUCCESS); 2683 } 2684 2685 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2686 { 2687 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2688 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2689 2690 PetscFunctionBegin; 2691 if (!Y->preallocated) { 2692 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2693 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2694 PetscInt nonew = aij->nonew; 2695 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2696 aij->nonew = nonew; 2697 } 2698 PetscCall(MatShift_Basic(Y, a)); 2699 PetscFunctionReturn(PETSC_SUCCESS); 2700 } 2701 2702 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2703 { 2704 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2705 2706 PetscFunctionBegin; 2707 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2708 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2709 if (d) { 2710 PetscInt rstart; 2711 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2712 *d += rstart; 2713 } 2714 PetscFunctionReturn(PETSC_SUCCESS); 2715 } 2716 2717 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2718 { 2719 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2720 2721 PetscFunctionBegin; 2722 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2723 PetscFunctionReturn(PETSC_SUCCESS); 2724 } 2725 2726 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2727 { 2728 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2729 2730 PetscFunctionBegin; 2731 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2732 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2733 PetscFunctionReturn(PETSC_SUCCESS); 2734 } 2735 2736 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2737 MatGetRow_MPIAIJ, 2738 MatRestoreRow_MPIAIJ, 2739 MatMult_MPIAIJ, 2740 /* 4*/ MatMultAdd_MPIAIJ, 2741 MatMultTranspose_MPIAIJ, 2742 MatMultTransposeAdd_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 /*10*/ NULL, 2747 NULL, 2748 NULL, 2749 MatSOR_MPIAIJ, 2750 MatTranspose_MPIAIJ, 2751 /*15*/ MatGetInfo_MPIAIJ, 2752 MatEqual_MPIAIJ, 2753 MatGetDiagonal_MPIAIJ, 2754 MatDiagonalScale_MPIAIJ, 2755 MatNorm_MPIAIJ, 2756 /*20*/ MatAssemblyBegin_MPIAIJ, 2757 MatAssemblyEnd_MPIAIJ, 2758 MatSetOption_MPIAIJ, 2759 MatZeroEntries_MPIAIJ, 2760 /*24*/ MatZeroRows_MPIAIJ, 2761 NULL, 2762 NULL, 2763 NULL, 2764 NULL, 2765 /*29*/ MatSetUp_MPI_Hash, 2766 NULL, 2767 NULL, 2768 MatGetDiagonalBlock_MPIAIJ, 2769 NULL, 2770 /*34*/ MatDuplicate_MPIAIJ, 2771 NULL, 2772 NULL, 2773 NULL, 2774 NULL, 2775 /*39*/ MatAXPY_MPIAIJ, 2776 MatCreateSubMatrices_MPIAIJ, 2777 MatIncreaseOverlap_MPIAIJ, 2778 MatGetValues_MPIAIJ, 2779 MatCopy_MPIAIJ, 2780 /*44*/ MatGetRowMax_MPIAIJ, 2781 MatScale_MPIAIJ, 2782 MatShift_MPIAIJ, 2783 MatDiagonalSet_MPIAIJ, 2784 MatZeroRowsColumns_MPIAIJ, 2785 /*49*/ MatSetRandom_MPIAIJ, 2786 MatGetRowIJ_MPIAIJ, 2787 MatRestoreRowIJ_MPIAIJ, 2788 NULL, 2789 NULL, 2790 /*54*/ MatFDColoringCreate_MPIXAIJ, 2791 NULL, 2792 MatSetUnfactored_MPIAIJ, 2793 MatPermute_MPIAIJ, 2794 NULL, 2795 /*59*/ MatCreateSubMatrix_MPIAIJ, 2796 MatDestroy_MPIAIJ, 2797 MatView_MPIAIJ, 2798 NULL, 2799 NULL, 2800 /*64*/ NULL, 2801 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2802 NULL, 2803 NULL, 2804 NULL, 2805 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2806 MatGetRowMinAbs_MPIAIJ, 2807 NULL, 2808 NULL, 2809 NULL, 2810 NULL, 2811 /*75*/ MatFDColoringApply_AIJ, 2812 MatSetFromOptions_MPIAIJ, 2813 NULL, 2814 NULL, 2815 MatFindZeroDiagonals_MPIAIJ, 2816 /*80*/ NULL, 2817 NULL, 2818 NULL, 2819 /*83*/ MatLoad_MPIAIJ, 2820 NULL, 2821 NULL, 2822 NULL, 2823 NULL, 2824 NULL, 2825 /*89*/ NULL, 2826 NULL, 2827 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2828 NULL, 2829 NULL, 2830 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2831 NULL, 2832 NULL, 2833 NULL, 2834 MatBindToCPU_MPIAIJ, 2835 /*99*/ MatProductSetFromOptions_MPIAIJ, 2836 NULL, 2837 NULL, 2838 MatConjugate_MPIAIJ, 2839 NULL, 2840 /*104*/ MatSetValuesRow_MPIAIJ, 2841 MatRealPart_MPIAIJ, 2842 MatImaginaryPart_MPIAIJ, 2843 NULL, 2844 NULL, 2845 /*109*/ NULL, 2846 NULL, 2847 MatGetRowMin_MPIAIJ, 2848 NULL, 2849 MatMissingDiagonal_MPIAIJ, 2850 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2851 NULL, 2852 MatGetGhosts_MPIAIJ, 2853 NULL, 2854 NULL, 2855 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2856 NULL, 2857 NULL, 2858 NULL, 2859 MatGetMultiProcBlock_MPIAIJ, 2860 /*124*/ MatFindNonzeroRows_MPIAIJ, 2861 MatGetColumnReductions_MPIAIJ, 2862 MatInvertBlockDiagonal_MPIAIJ, 2863 MatInvertVariableBlockDiagonal_MPIAIJ, 2864 MatCreateSubMatricesMPI_MPIAIJ, 2865 /*129*/ NULL, 2866 NULL, 2867 NULL, 2868 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2869 NULL, 2870 /*134*/ NULL, 2871 NULL, 2872 NULL, 2873 NULL, 2874 NULL, 2875 /*139*/ MatSetBlockSizes_MPIAIJ, 2876 NULL, 2877 NULL, 2878 MatFDColoringSetUp_MPIXAIJ, 2879 MatFindOffBlockDiagonalEntries_MPIAIJ, 2880 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2881 /*145*/ NULL, 2882 NULL, 2883 NULL, 2884 MatCreateGraph_Simple_AIJ, 2885 NULL, 2886 /*150*/ NULL, 2887 MatEliminateZeros_MPIAIJ, 2888 MatGetRowSumAbs_MPIAIJ, 2889 NULL, 2890 NULL, 2891 /*155*/ NULL, 2892 MatCopyHashToXAIJ_MPI_Hash}; 2893 2894 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2895 { 2896 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2897 2898 PetscFunctionBegin; 2899 PetscCall(MatStoreValues(aij->A)); 2900 PetscCall(MatStoreValues(aij->B)); 2901 PetscFunctionReturn(PETSC_SUCCESS); 2902 } 2903 2904 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2905 { 2906 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2907 2908 PetscFunctionBegin; 2909 PetscCall(MatRetrieveValues(aij->A)); 2910 PetscCall(MatRetrieveValues(aij->B)); 2911 PetscFunctionReturn(PETSC_SUCCESS); 2912 } 2913 2914 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2915 { 2916 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2917 PetscMPIInt size; 2918 2919 PetscFunctionBegin; 2920 if (B->hash_active) { 2921 B->ops[0] = b->cops; 2922 B->hash_active = PETSC_FALSE; 2923 } 2924 PetscCall(PetscLayoutSetUp(B->rmap)); 2925 PetscCall(PetscLayoutSetUp(B->cmap)); 2926 2927 #if defined(PETSC_USE_CTABLE) 2928 PetscCall(PetscHMapIDestroy(&b->colmap)); 2929 #else 2930 PetscCall(PetscFree(b->colmap)); 2931 #endif 2932 PetscCall(PetscFree(b->garray)); 2933 PetscCall(VecDestroy(&b->lvec)); 2934 PetscCall(VecScatterDestroy(&b->Mvctx)); 2935 2936 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2937 2938 MatSeqXAIJGetOptions_Private(b->B); 2939 PetscCall(MatDestroy(&b->B)); 2940 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2941 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2942 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2943 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2944 MatSeqXAIJRestoreOptions_Private(b->B); 2945 2946 MatSeqXAIJGetOptions_Private(b->A); 2947 PetscCall(MatDestroy(&b->A)); 2948 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2949 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2950 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2951 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2952 MatSeqXAIJRestoreOptions_Private(b->A); 2953 2954 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2955 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2956 B->preallocated = PETSC_TRUE; 2957 B->was_assembled = PETSC_FALSE; 2958 B->assembled = PETSC_FALSE; 2959 PetscFunctionReturn(PETSC_SUCCESS); 2960 } 2961 2962 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2963 { 2964 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2965 2966 PetscFunctionBegin; 2967 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2968 PetscCall(PetscLayoutSetUp(B->rmap)); 2969 PetscCall(PetscLayoutSetUp(B->cmap)); 2970 if (B->assembled || B->was_assembled) PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2971 else { 2972 #if defined(PETSC_USE_CTABLE) 2973 PetscCall(PetscHMapIDestroy(&b->colmap)); 2974 #else 2975 PetscCall(PetscFree(b->colmap)); 2976 #endif 2977 PetscCall(PetscFree(b->garray)); 2978 PetscCall(VecDestroy(&b->lvec)); 2979 } 2980 PetscCall(VecScatterDestroy(&b->Mvctx)); 2981 2982 PetscCall(MatResetPreallocation(b->A)); 2983 PetscCall(MatResetPreallocation(b->B)); 2984 B->preallocated = PETSC_TRUE; 2985 B->was_assembled = PETSC_FALSE; 2986 B->assembled = PETSC_FALSE; 2987 PetscFunctionReturn(PETSC_SUCCESS); 2988 } 2989 2990 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2991 { 2992 Mat mat; 2993 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2994 2995 PetscFunctionBegin; 2996 *newmat = NULL; 2997 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2998 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2999 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 3000 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3001 a = (Mat_MPIAIJ *)mat->data; 3002 3003 mat->factortype = matin->factortype; 3004 mat->assembled = matin->assembled; 3005 mat->insertmode = NOT_SET_VALUES; 3006 3007 a->size = oldmat->size; 3008 a->rank = oldmat->rank; 3009 a->donotstash = oldmat->donotstash; 3010 a->roworiented = oldmat->roworiented; 3011 a->rowindices = NULL; 3012 a->rowvalues = NULL; 3013 a->getrowactive = PETSC_FALSE; 3014 3015 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3016 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3017 if (matin->hash_active) { 3018 PetscCall(MatSetUp(mat)); 3019 } else { 3020 mat->preallocated = matin->preallocated; 3021 if (oldmat->colmap) { 3022 #if defined(PETSC_USE_CTABLE) 3023 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3024 #else 3025 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3026 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3027 #endif 3028 } else a->colmap = NULL; 3029 if (oldmat->garray) { 3030 PetscInt len; 3031 len = oldmat->B->cmap->n; 3032 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3033 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3034 } else a->garray = NULL; 3035 3036 /* It may happen MatDuplicate is called with a non-assembled matrix 3037 In fact, MatDuplicate only requires the matrix to be preallocated 3038 This may happen inside a DMCreateMatrix_Shell */ 3039 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3040 if (oldmat->Mvctx) { 3041 a->Mvctx = oldmat->Mvctx; 3042 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3043 } 3044 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3045 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3046 } 3047 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3048 *newmat = mat; 3049 PetscFunctionReturn(PETSC_SUCCESS); 3050 } 3051 3052 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3053 { 3054 PetscBool isbinary, ishdf5; 3055 3056 PetscFunctionBegin; 3057 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3058 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3059 /* force binary viewer to load .info file if it has not yet done so */ 3060 PetscCall(PetscViewerSetUp(viewer)); 3061 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3062 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3063 if (isbinary) { 3064 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3065 } else if (ishdf5) { 3066 #if defined(PETSC_HAVE_HDF5) 3067 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3068 #else 3069 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3070 #endif 3071 } else { 3072 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3073 } 3074 PetscFunctionReturn(PETSC_SUCCESS); 3075 } 3076 3077 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3078 { 3079 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3080 PetscInt *rowidxs, *colidxs; 3081 PetscScalar *matvals; 3082 3083 PetscFunctionBegin; 3084 PetscCall(PetscViewerSetUp(viewer)); 3085 3086 /* read in matrix header */ 3087 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3088 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3089 M = header[1]; 3090 N = header[2]; 3091 nz = header[3]; 3092 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3093 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3094 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3095 3096 /* set block sizes from the viewer's .info file */ 3097 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3098 /* set global sizes if not set already */ 3099 if (mat->rmap->N < 0) mat->rmap->N = M; 3100 if (mat->cmap->N < 0) mat->cmap->N = N; 3101 PetscCall(PetscLayoutSetUp(mat->rmap)); 3102 PetscCall(PetscLayoutSetUp(mat->cmap)); 3103 3104 /* check if the matrix sizes are correct */ 3105 PetscCall(MatGetSize(mat, &rows, &cols)); 3106 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3107 3108 /* read in row lengths and build row indices */ 3109 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3110 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3111 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3112 rowidxs[0] = 0; 3113 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3114 if (nz != PETSC_INT_MAX) { 3115 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3116 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3117 } 3118 3119 /* read in column indices and matrix values */ 3120 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3121 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3122 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3123 /* store matrix indices and values */ 3124 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3125 PetscCall(PetscFree(rowidxs)); 3126 PetscCall(PetscFree2(colidxs, matvals)); 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /* Not scalable because of ISAllGather() unless getting all columns. */ 3131 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3132 { 3133 IS iscol_local; 3134 PetscBool isstride; 3135 PetscMPIInt lisstride = 0, gisstride; 3136 3137 PetscFunctionBegin; 3138 /* check if we are grabbing all columns*/ 3139 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3140 3141 if (isstride) { 3142 PetscInt start, len, mstart, mlen; 3143 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3144 PetscCall(ISGetLocalSize(iscol, &len)); 3145 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3146 if (mstart == start && mlen - mstart == len) lisstride = 1; 3147 } 3148 3149 PetscCallMPI(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3150 if (gisstride) { 3151 PetscInt N; 3152 PetscCall(MatGetSize(mat, NULL, &N)); 3153 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3154 PetscCall(ISSetIdentity(iscol_local)); 3155 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3156 } else { 3157 PetscInt cbs; 3158 PetscCall(ISGetBlockSize(iscol, &cbs)); 3159 PetscCall(ISAllGather(iscol, &iscol_local)); 3160 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3161 } 3162 3163 *isseq = iscol_local; 3164 PetscFunctionReturn(PETSC_SUCCESS); 3165 } 3166 3167 /* 3168 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3169 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3170 3171 Input Parameters: 3172 + mat - matrix 3173 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3174 i.e., mat->rstart <= isrow[i] < mat->rend 3175 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3176 i.e., mat->cstart <= iscol[i] < mat->cend 3177 3178 Output Parameters: 3179 + isrow_d - sequential row index set for retrieving mat->A 3180 . iscol_d - sequential column index set for retrieving mat->A 3181 . iscol_o - sequential column index set for retrieving mat->B 3182 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3183 */ 3184 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3185 { 3186 Vec x, cmap; 3187 const PetscInt *is_idx; 3188 PetscScalar *xarray, *cmaparray; 3189 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3190 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3191 Mat B = a->B; 3192 Vec lvec = a->lvec, lcmap; 3193 PetscInt i, cstart, cend, Bn = B->cmap->N; 3194 MPI_Comm comm; 3195 VecScatter Mvctx = a->Mvctx; 3196 3197 PetscFunctionBegin; 3198 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3199 PetscCall(ISGetLocalSize(iscol, &ncols)); 3200 3201 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3202 PetscCall(MatCreateVecs(mat, &x, NULL)); 3203 PetscCall(VecSet(x, -1.0)); 3204 PetscCall(VecDuplicate(x, &cmap)); 3205 PetscCall(VecSet(cmap, -1.0)); 3206 3207 /* Get start indices */ 3208 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3209 isstart -= ncols; 3210 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3211 3212 PetscCall(ISGetIndices(iscol, &is_idx)); 3213 PetscCall(VecGetArray(x, &xarray)); 3214 PetscCall(VecGetArray(cmap, &cmaparray)); 3215 PetscCall(PetscMalloc1(ncols, &idx)); 3216 for (i = 0; i < ncols; i++) { 3217 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3218 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3219 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3220 } 3221 PetscCall(VecRestoreArray(x, &xarray)); 3222 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3223 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3224 3225 /* Get iscol_d */ 3226 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3227 PetscCall(ISGetBlockSize(iscol, &i)); 3228 PetscCall(ISSetBlockSize(*iscol_d, i)); 3229 3230 /* Get isrow_d */ 3231 PetscCall(ISGetLocalSize(isrow, &m)); 3232 rstart = mat->rmap->rstart; 3233 PetscCall(PetscMalloc1(m, &idx)); 3234 PetscCall(ISGetIndices(isrow, &is_idx)); 3235 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3236 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3237 3238 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3239 PetscCall(ISGetBlockSize(isrow, &i)); 3240 PetscCall(ISSetBlockSize(*isrow_d, i)); 3241 3242 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3243 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3244 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3245 3246 PetscCall(VecDuplicate(lvec, &lcmap)); 3247 3248 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3249 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3250 3251 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3252 /* off-process column indices */ 3253 count = 0; 3254 PetscCall(PetscMalloc1(Bn, &idx)); 3255 PetscCall(PetscMalloc1(Bn, &cmap1)); 3256 3257 PetscCall(VecGetArray(lvec, &xarray)); 3258 PetscCall(VecGetArray(lcmap, &cmaparray)); 3259 for (i = 0; i < Bn; i++) { 3260 if (PetscRealPart(xarray[i]) > -1.0) { 3261 idx[count] = i; /* local column index in off-diagonal part B */ 3262 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3263 count++; 3264 } 3265 } 3266 PetscCall(VecRestoreArray(lvec, &xarray)); 3267 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3268 3269 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3270 /* cannot ensure iscol_o has same blocksize as iscol! */ 3271 3272 PetscCall(PetscFree(idx)); 3273 *garray = cmap1; 3274 3275 PetscCall(VecDestroy(&x)); 3276 PetscCall(VecDestroy(&cmap)); 3277 PetscCall(VecDestroy(&lcmap)); 3278 PetscFunctionReturn(PETSC_SUCCESS); 3279 } 3280 3281 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3282 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3283 { 3284 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3285 Mat M = NULL; 3286 MPI_Comm comm; 3287 IS iscol_d, isrow_d, iscol_o; 3288 Mat Asub = NULL, Bsub = NULL; 3289 PetscInt n; 3290 3291 PetscFunctionBegin; 3292 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3293 3294 if (call == MAT_REUSE_MATRIX) { 3295 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3296 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3297 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3298 3299 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3300 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3301 3302 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3303 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3304 3305 /* Update diagonal and off-diagonal portions of submat */ 3306 asub = (Mat_MPIAIJ *)(*submat)->data; 3307 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3308 PetscCall(ISGetLocalSize(iscol_o, &n)); 3309 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3310 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3311 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3312 3313 } else { /* call == MAT_INITIAL_MATRIX) */ 3314 PetscInt *garray; 3315 PetscInt BsubN; 3316 3317 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3318 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3319 3320 /* Create local submatrices Asub and Bsub */ 3321 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3322 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3323 3324 /* Create submatrix M */ 3325 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3326 3327 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3328 asub = (Mat_MPIAIJ *)M->data; 3329 3330 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3331 n = asub->B->cmap->N; 3332 if (BsubN > n) { 3333 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3334 const PetscInt *idx; 3335 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3336 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3337 3338 PetscCall(PetscMalloc1(n, &idx_new)); 3339 j = 0; 3340 PetscCall(ISGetIndices(iscol_o, &idx)); 3341 for (i = 0; i < n; i++) { 3342 if (j >= BsubN) break; 3343 while (subgarray[i] > garray[j]) j++; 3344 3345 if (subgarray[i] == garray[j]) { 3346 idx_new[i] = idx[j++]; 3347 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3348 } 3349 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3350 3351 PetscCall(ISDestroy(&iscol_o)); 3352 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3353 3354 } else if (BsubN < n) { 3355 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3356 } 3357 3358 PetscCall(PetscFree(garray)); 3359 *submat = M; 3360 3361 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3362 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3363 PetscCall(ISDestroy(&isrow_d)); 3364 3365 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3366 PetscCall(ISDestroy(&iscol_d)); 3367 3368 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3369 PetscCall(ISDestroy(&iscol_o)); 3370 } 3371 PetscFunctionReturn(PETSC_SUCCESS); 3372 } 3373 3374 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3375 { 3376 IS iscol_local = NULL, isrow_d; 3377 PetscInt csize; 3378 PetscInt n, i, j, start, end; 3379 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3380 MPI_Comm comm; 3381 3382 PetscFunctionBegin; 3383 /* If isrow has same processor distribution as mat, 3384 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3385 if (call == MAT_REUSE_MATRIX) { 3386 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3387 if (isrow_d) { 3388 sameRowDist = PETSC_TRUE; 3389 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3390 } else { 3391 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3392 if (iscol_local) { 3393 sameRowDist = PETSC_TRUE; 3394 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3395 } 3396 } 3397 } else { 3398 /* Check if isrow has same processor distribution as mat */ 3399 sameDist[0] = PETSC_FALSE; 3400 PetscCall(ISGetLocalSize(isrow, &n)); 3401 if (!n) { 3402 sameDist[0] = PETSC_TRUE; 3403 } else { 3404 PetscCall(ISGetMinMax(isrow, &i, &j)); 3405 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3406 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3407 } 3408 3409 /* Check if iscol has same processor distribution as mat */ 3410 sameDist[1] = PETSC_FALSE; 3411 PetscCall(ISGetLocalSize(iscol, &n)); 3412 if (!n) { 3413 sameDist[1] = PETSC_TRUE; 3414 } else { 3415 PetscCall(ISGetMinMax(iscol, &i, &j)); 3416 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3417 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3418 } 3419 3420 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3421 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3422 sameRowDist = tsameDist[0]; 3423 } 3424 3425 if (sameRowDist) { 3426 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3427 /* isrow and iscol have same processor distribution as mat */ 3428 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3429 PetscFunctionReturn(PETSC_SUCCESS); 3430 } else { /* sameRowDist */ 3431 /* isrow has same processor distribution as mat */ 3432 if (call == MAT_INITIAL_MATRIX) { 3433 PetscBool sorted; 3434 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3435 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3436 PetscCall(ISGetSize(iscol, &i)); 3437 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3438 3439 PetscCall(ISSorted(iscol_local, &sorted)); 3440 if (sorted) { 3441 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3442 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3443 PetscFunctionReturn(PETSC_SUCCESS); 3444 } 3445 } else { /* call == MAT_REUSE_MATRIX */ 3446 IS iscol_sub; 3447 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3448 if (iscol_sub) { 3449 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3450 PetscFunctionReturn(PETSC_SUCCESS); 3451 } 3452 } 3453 } 3454 } 3455 3456 /* General case: iscol -> iscol_local which has global size of iscol */ 3457 if (call == MAT_REUSE_MATRIX) { 3458 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3459 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3460 } else { 3461 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3462 } 3463 3464 PetscCall(ISGetLocalSize(iscol, &csize)); 3465 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3466 3467 if (call == MAT_INITIAL_MATRIX) { 3468 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3469 PetscCall(ISDestroy(&iscol_local)); 3470 } 3471 PetscFunctionReturn(PETSC_SUCCESS); 3472 } 3473 3474 /*@C 3475 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3476 and "off-diagonal" part of the matrix in CSR format. 3477 3478 Collective 3479 3480 Input Parameters: 3481 + comm - MPI communicator 3482 . A - "diagonal" portion of matrix 3483 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3484 - garray - global index of `B` columns 3485 3486 Output Parameter: 3487 . mat - the matrix, with input `A` as its local diagonal matrix 3488 3489 Level: advanced 3490 3491 Notes: 3492 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3493 3494 `A` becomes part of output mat, `B` is destroyed by this routine. The user cannot use `A` and `B` anymore. 3495 3496 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3497 @*/ 3498 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) 3499 { 3500 Mat_MPIAIJ *maij; 3501 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3502 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3503 const PetscScalar *oa; 3504 Mat Bnew; 3505 PetscInt m, n, N; 3506 MatType mpi_mat_type; 3507 3508 PetscFunctionBegin; 3509 PetscCall(MatCreate(comm, mat)); 3510 PetscCall(MatGetSize(A, &m, &n)); 3511 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3512 PetscCheck(PetscAbs(A->rmap->bs) == PetscAbs(B->rmap->bs), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3513 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3514 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3515 3516 /* Get global columns of mat */ 3517 PetscCallMPI(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3518 3519 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3520 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3521 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3522 PetscCall(MatSetType(*mat, mpi_mat_type)); 3523 3524 if (A->rmap->bs > 1 || A->cmap->bs > 1) PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3525 maij = (Mat_MPIAIJ *)(*mat)->data; 3526 3527 (*mat)->preallocated = PETSC_TRUE; 3528 3529 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3530 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3531 3532 /* Set A as diagonal portion of *mat */ 3533 maij->A = A; 3534 3535 nz = oi[m]; 3536 for (i = 0; i < nz; i++) { 3537 col = oj[i]; 3538 oj[i] = garray[col]; 3539 } 3540 3541 /* Set Bnew as off-diagonal portion of *mat */ 3542 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3543 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3544 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3545 bnew = (Mat_SeqAIJ *)Bnew->data; 3546 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3547 maij->B = Bnew; 3548 3549 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3550 3551 b->free_a = PETSC_FALSE; 3552 b->free_ij = PETSC_FALSE; 3553 PetscCall(MatDestroy(&B)); 3554 3555 bnew->free_a = PETSC_TRUE; 3556 bnew->free_ij = PETSC_TRUE; 3557 3558 /* condense columns of maij->B */ 3559 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3560 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3561 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3562 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3563 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3564 PetscFunctionReturn(PETSC_SUCCESS); 3565 } 3566 3567 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3568 3569 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3570 { 3571 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3572 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3573 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3574 Mat M, Msub, B = a->B; 3575 MatScalar *aa; 3576 Mat_SeqAIJ *aij; 3577 PetscInt *garray = a->garray, *colsub, Ncols; 3578 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3579 IS iscol_sub, iscmap; 3580 const PetscInt *is_idx, *cmap; 3581 PetscBool allcolumns = PETSC_FALSE; 3582 MPI_Comm comm; 3583 3584 PetscFunctionBegin; 3585 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3586 if (call == MAT_REUSE_MATRIX) { 3587 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3588 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3589 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3590 3591 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3592 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3593 3594 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3595 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3596 3597 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3598 3599 } else { /* call == MAT_INITIAL_MATRIX) */ 3600 PetscBool flg; 3601 3602 PetscCall(ISGetLocalSize(iscol, &n)); 3603 PetscCall(ISGetSize(iscol, &Ncols)); 3604 3605 /* (1) iscol -> nonscalable iscol_local */ 3606 /* Check for special case: each processor gets entire matrix columns */ 3607 PetscCall(ISIdentity(iscol_local, &flg)); 3608 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3609 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3610 if (allcolumns) { 3611 iscol_sub = iscol_local; 3612 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3613 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3614 3615 } else { 3616 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3617 PetscInt *idx, *cmap1, k; 3618 PetscCall(PetscMalloc1(Ncols, &idx)); 3619 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3620 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3621 count = 0; 3622 k = 0; 3623 for (i = 0; i < Ncols; i++) { 3624 j = is_idx[i]; 3625 if (j >= cstart && j < cend) { 3626 /* diagonal part of mat */ 3627 idx[count] = j; 3628 cmap1[count++] = i; /* column index in submat */ 3629 } else if (Bn) { 3630 /* off-diagonal part of mat */ 3631 if (j == garray[k]) { 3632 idx[count] = j; 3633 cmap1[count++] = i; /* column index in submat */ 3634 } else if (j > garray[k]) { 3635 while (j > garray[k] && k < Bn - 1) k++; 3636 if (j == garray[k]) { 3637 idx[count] = j; 3638 cmap1[count++] = i; /* column index in submat */ 3639 } 3640 } 3641 } 3642 } 3643 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3644 3645 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3646 PetscCall(ISGetBlockSize(iscol, &cbs)); 3647 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3648 3649 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3650 } 3651 3652 /* (3) Create sequential Msub */ 3653 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3654 } 3655 3656 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3657 aij = (Mat_SeqAIJ *)Msub->data; 3658 ii = aij->i; 3659 PetscCall(ISGetIndices(iscmap, &cmap)); 3660 3661 /* 3662 m - number of local rows 3663 Ncols - number of columns (same on all processors) 3664 rstart - first row in new global matrix generated 3665 */ 3666 PetscCall(MatGetSize(Msub, &m, NULL)); 3667 3668 if (call == MAT_INITIAL_MATRIX) { 3669 /* (4) Create parallel newmat */ 3670 PetscMPIInt rank, size; 3671 PetscInt csize; 3672 3673 PetscCallMPI(MPI_Comm_size(comm, &size)); 3674 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3675 3676 /* 3677 Determine the number of non-zeros in the diagonal and off-diagonal 3678 portions of the matrix in order to do correct preallocation 3679 */ 3680 3681 /* first get start and end of "diagonal" columns */ 3682 PetscCall(ISGetLocalSize(iscol, &csize)); 3683 if (csize == PETSC_DECIDE) { 3684 PetscCall(ISGetSize(isrow, &mglobal)); 3685 if (mglobal == Ncols) { /* square matrix */ 3686 nlocal = m; 3687 } else { 3688 nlocal = Ncols / size + ((Ncols % size) > rank); 3689 } 3690 } else { 3691 nlocal = csize; 3692 } 3693 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3694 rstart = rend - nlocal; 3695 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3696 3697 /* next, compute all the lengths */ 3698 jj = aij->j; 3699 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3700 olens = dlens + m; 3701 for (i = 0; i < m; i++) { 3702 jend = ii[i + 1] - ii[i]; 3703 olen = 0; 3704 dlen = 0; 3705 for (j = 0; j < jend; j++) { 3706 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3707 else dlen++; 3708 jj++; 3709 } 3710 olens[i] = olen; 3711 dlens[i] = dlen; 3712 } 3713 3714 PetscCall(ISGetBlockSize(isrow, &bs)); 3715 PetscCall(ISGetBlockSize(iscol, &cbs)); 3716 3717 PetscCall(MatCreate(comm, &M)); 3718 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3719 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3720 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3721 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3722 PetscCall(PetscFree(dlens)); 3723 3724 } else { /* call == MAT_REUSE_MATRIX */ 3725 M = *newmat; 3726 PetscCall(MatGetLocalSize(M, &i, NULL)); 3727 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3728 PetscCall(MatZeroEntries(M)); 3729 /* 3730 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3731 rather than the slower MatSetValues(). 3732 */ 3733 M->was_assembled = PETSC_TRUE; 3734 M->assembled = PETSC_FALSE; 3735 } 3736 3737 /* (5) Set values of Msub to *newmat */ 3738 PetscCall(PetscMalloc1(count, &colsub)); 3739 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3740 3741 jj = aij->j; 3742 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3743 for (i = 0; i < m; i++) { 3744 row = rstart + i; 3745 nz = ii[i + 1] - ii[i]; 3746 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3747 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3748 jj += nz; 3749 aa += nz; 3750 } 3751 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3752 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3753 3754 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3755 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3756 3757 PetscCall(PetscFree(colsub)); 3758 3759 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3760 if (call == MAT_INITIAL_MATRIX) { 3761 *newmat = M; 3762 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3763 PetscCall(MatDestroy(&Msub)); 3764 3765 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3766 PetscCall(ISDestroy(&iscol_sub)); 3767 3768 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3769 PetscCall(ISDestroy(&iscmap)); 3770 3771 if (iscol_local) { 3772 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3773 PetscCall(ISDestroy(&iscol_local)); 3774 } 3775 } 3776 PetscFunctionReturn(PETSC_SUCCESS); 3777 } 3778 3779 /* 3780 Not great since it makes two copies of the submatrix, first an SeqAIJ 3781 in local and then by concatenating the local matrices the end result. 3782 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3783 3784 This requires a sequential iscol with all indices. 3785 */ 3786 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3787 { 3788 PetscMPIInt rank, size; 3789 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3790 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3791 Mat M, Mreuse; 3792 MatScalar *aa, *vwork; 3793 MPI_Comm comm; 3794 Mat_SeqAIJ *aij; 3795 PetscBool colflag, allcolumns = PETSC_FALSE; 3796 3797 PetscFunctionBegin; 3798 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3799 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3800 PetscCallMPI(MPI_Comm_size(comm, &size)); 3801 3802 /* Check for special case: each processor gets entire matrix columns */ 3803 PetscCall(ISIdentity(iscol, &colflag)); 3804 PetscCall(ISGetLocalSize(iscol, &n)); 3805 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3806 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3807 3808 if (call == MAT_REUSE_MATRIX) { 3809 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3810 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3811 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3812 } else { 3813 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3814 } 3815 3816 /* 3817 m - number of local rows 3818 n - number of columns (same on all processors) 3819 rstart - first row in new global matrix generated 3820 */ 3821 PetscCall(MatGetSize(Mreuse, &m, &n)); 3822 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3823 if (call == MAT_INITIAL_MATRIX) { 3824 aij = (Mat_SeqAIJ *)Mreuse->data; 3825 ii = aij->i; 3826 jj = aij->j; 3827 3828 /* 3829 Determine the number of non-zeros in the diagonal and off-diagonal 3830 portions of the matrix in order to do correct preallocation 3831 */ 3832 3833 /* first get start and end of "diagonal" columns */ 3834 if (csize == PETSC_DECIDE) { 3835 PetscCall(ISGetSize(isrow, &mglobal)); 3836 if (mglobal == n) { /* square matrix */ 3837 nlocal = m; 3838 } else { 3839 nlocal = n / size + ((n % size) > rank); 3840 } 3841 } else { 3842 nlocal = csize; 3843 } 3844 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3845 rstart = rend - nlocal; 3846 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3847 3848 /* next, compute all the lengths */ 3849 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3850 olens = dlens + m; 3851 for (i = 0; i < m; i++) { 3852 jend = ii[i + 1] - ii[i]; 3853 olen = 0; 3854 dlen = 0; 3855 for (j = 0; j < jend; j++) { 3856 if (*jj < rstart || *jj >= rend) olen++; 3857 else dlen++; 3858 jj++; 3859 } 3860 olens[i] = olen; 3861 dlens[i] = dlen; 3862 } 3863 PetscCall(MatCreate(comm, &M)); 3864 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3865 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3866 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3867 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3868 PetscCall(PetscFree(dlens)); 3869 } else { 3870 PetscInt ml, nl; 3871 3872 M = *newmat; 3873 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3874 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3875 PetscCall(MatZeroEntries(M)); 3876 /* 3877 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3878 rather than the slower MatSetValues(). 3879 */ 3880 M->was_assembled = PETSC_TRUE; 3881 M->assembled = PETSC_FALSE; 3882 } 3883 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3884 aij = (Mat_SeqAIJ *)Mreuse->data; 3885 ii = aij->i; 3886 jj = aij->j; 3887 3888 /* trigger copy to CPU if needed */ 3889 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3890 for (i = 0; i < m; i++) { 3891 row = rstart + i; 3892 nz = ii[i + 1] - ii[i]; 3893 cwork = jj; 3894 jj = PetscSafePointerPlusOffset(jj, nz); 3895 vwork = aa; 3896 aa = PetscSafePointerPlusOffset(aa, nz); 3897 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3898 } 3899 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3900 3901 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3902 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3903 *newmat = M; 3904 3905 /* save submatrix used in processor for next request */ 3906 if (call == MAT_INITIAL_MATRIX) { 3907 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3908 PetscCall(MatDestroy(&Mreuse)); 3909 } 3910 PetscFunctionReturn(PETSC_SUCCESS); 3911 } 3912 3913 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3914 { 3915 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3916 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3917 const PetscInt *JJ; 3918 PetscBool nooffprocentries; 3919 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3920 3921 PetscFunctionBegin; 3922 PetscCall(PetscLayoutSetUp(B->rmap)); 3923 PetscCall(PetscLayoutSetUp(B->cmap)); 3924 m = B->rmap->n; 3925 cstart = B->cmap->rstart; 3926 cend = B->cmap->rend; 3927 rstart = B->rmap->rstart; 3928 irstart = Ii[0]; 3929 3930 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3931 3932 if (PetscDefined(USE_DEBUG)) { 3933 for (i = 0; i < m; i++) { 3934 nnz = Ii[i + 1] - Ii[i]; 3935 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3936 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3937 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3938 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3939 } 3940 } 3941 3942 for (i = 0; i < m; i++) { 3943 nnz = Ii[i + 1] - Ii[i]; 3944 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3945 nnz_max = PetscMax(nnz_max, nnz); 3946 d = 0; 3947 for (j = 0; j < nnz; j++) { 3948 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3949 } 3950 d_nnz[i] = d; 3951 o_nnz[i] = nnz - d; 3952 } 3953 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3954 PetscCall(PetscFree2(d_nnz, o_nnz)); 3955 3956 for (i = 0; i < m; i++) { 3957 ii = i + rstart; 3958 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3959 } 3960 nooffprocentries = B->nooffprocentries; 3961 B->nooffprocentries = PETSC_TRUE; 3962 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3963 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3964 B->nooffprocentries = nooffprocentries; 3965 3966 /* count number of entries below block diagonal */ 3967 PetscCall(PetscFree(Aij->ld)); 3968 PetscCall(PetscCalloc1(m, &ld)); 3969 Aij->ld = ld; 3970 for (i = 0; i < m; i++) { 3971 nnz = Ii[i + 1] - Ii[i]; 3972 j = 0; 3973 while (j < nnz && J[j] < cstart) j++; 3974 ld[i] = j; 3975 if (J) J += nnz; 3976 } 3977 3978 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3979 PetscFunctionReturn(PETSC_SUCCESS); 3980 } 3981 3982 /*@ 3983 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3984 (the default parallel PETSc format). 3985 3986 Collective 3987 3988 Input Parameters: 3989 + B - the matrix 3990 . i - the indices into `j` for the start of each local row (indices start with zero) 3991 . j - the column indices for each local row (indices start with zero) 3992 - v - optional values in the matrix 3993 3994 Level: developer 3995 3996 Notes: 3997 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3998 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3999 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4000 4001 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4002 4003 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 4004 4005 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 4006 4007 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 4008 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4009 4010 The format which is used for the sparse matrix input, is equivalent to a 4011 row-major ordering.. i.e for the following matrix, the input data expected is 4012 as shown 4013 .vb 4014 1 0 0 4015 2 0 3 P0 4016 ------- 4017 4 5 6 P1 4018 4019 Process0 [P0] rows_owned=[0,1] 4020 i = {0,1,3} [size = nrow+1 = 2+1] 4021 j = {0,0,2} [size = 3] 4022 v = {1,2,3} [size = 3] 4023 4024 Process1 [P1] rows_owned=[2] 4025 i = {0,3} [size = nrow+1 = 1+1] 4026 j = {0,1,2} [size = 3] 4027 v = {4,5,6} [size = 3] 4028 .ve 4029 4030 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4031 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4032 @*/ 4033 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4034 { 4035 PetscFunctionBegin; 4036 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4037 PetscFunctionReturn(PETSC_SUCCESS); 4038 } 4039 4040 /*@ 4041 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4042 (the default parallel PETSc format). For good matrix assembly performance 4043 the user should preallocate the matrix storage by setting the parameters 4044 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4045 4046 Collective 4047 4048 Input Parameters: 4049 + B - the matrix 4050 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4051 (same value is used for all local rows) 4052 . d_nnz - array containing the number of nonzeros in the various rows of the 4053 DIAGONAL portion of the local submatrix (possibly different for each row) 4054 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4055 The size of this array is equal to the number of local rows, i.e 'm'. 4056 For matrices that will be factored, you must leave room for (and set) 4057 the diagonal entry even if it is zero. 4058 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4059 submatrix (same value is used for all local rows). 4060 - o_nnz - array containing the number of nonzeros in the various rows of the 4061 OFF-DIAGONAL portion of the local submatrix (possibly different for 4062 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4063 structure. The size of this array is equal to the number 4064 of local rows, i.e 'm'. 4065 4066 Example Usage: 4067 Consider the following 8x8 matrix with 34 non-zero values, that is 4068 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4069 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4070 as follows 4071 4072 .vb 4073 1 2 0 | 0 3 0 | 0 4 4074 Proc0 0 5 6 | 7 0 0 | 8 0 4075 9 0 10 | 11 0 0 | 12 0 4076 ------------------------------------- 4077 13 0 14 | 15 16 17 | 0 0 4078 Proc1 0 18 0 | 19 20 21 | 0 0 4079 0 0 0 | 22 23 0 | 24 0 4080 ------------------------------------- 4081 Proc2 25 26 27 | 0 0 28 | 29 0 4082 30 0 0 | 31 32 33 | 0 34 4083 .ve 4084 4085 This can be represented as a collection of submatrices as 4086 .vb 4087 A B C 4088 D E F 4089 G H I 4090 .ve 4091 4092 Where the submatrices A,B,C are owned by proc0, D,E,F are 4093 owned by proc1, G,H,I are owned by proc2. 4094 4095 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4096 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4097 The 'M','N' parameters are 8,8, and have the same values on all procs. 4098 4099 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4100 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4101 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4102 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4103 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4104 matrix, and [DF] as another `MATSEQAIJ` matrix. 4105 4106 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4107 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4108 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4109 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4110 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4111 In this case, the values of `d_nz`, `o_nz` are 4112 .vb 4113 proc0 dnz = 2, o_nz = 2 4114 proc1 dnz = 3, o_nz = 2 4115 proc2 dnz = 1, o_nz = 4 4116 .ve 4117 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4118 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4119 for proc3. i.e we are using 12+15+10=37 storage locations to store 4120 34 values. 4121 4122 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4123 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4124 In the above case the values for `d_nnz`, `o_nnz` are 4125 .vb 4126 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4127 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4128 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4129 .ve 4130 Here the space allocated is sum of all the above values i.e 34, and 4131 hence pre-allocation is perfect. 4132 4133 Level: intermediate 4134 4135 Notes: 4136 If the *_nnz parameter is given then the *_nz parameter is ignored 4137 4138 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4139 storage. The stored row and column indices begin with zero. 4140 See [Sparse Matrices](sec_matsparse) for details. 4141 4142 The parallel matrix is partitioned such that the first m0 rows belong to 4143 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4144 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4145 4146 The DIAGONAL portion of the local submatrix of a processor can be defined 4147 as the submatrix which is obtained by extraction the part corresponding to 4148 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4149 first row that belongs to the processor, r2 is the last row belonging to 4150 the this processor, and c1-c2 is range of indices of the local part of a 4151 vector suitable for applying the matrix to. This is an mxn matrix. In the 4152 common case of a square matrix, the row and column ranges are the same and 4153 the DIAGONAL part is also square. The remaining portion of the local 4154 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4155 4156 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4157 4158 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4159 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4160 You can also run with the option `-info` and look for messages with the string 4161 malloc in them to see if additional memory allocation was needed. 4162 4163 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4164 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4165 @*/ 4166 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4167 { 4168 PetscFunctionBegin; 4169 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4170 PetscValidType(B, 1); 4171 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4172 PetscFunctionReturn(PETSC_SUCCESS); 4173 } 4174 4175 /*@ 4176 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4177 CSR format for the local rows. 4178 4179 Collective 4180 4181 Input Parameters: 4182 + comm - MPI communicator 4183 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4184 . n - This value should be the same as the local size used in creating the 4185 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4186 calculated if `N` is given) For square matrices n is almost always `m`. 4187 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4188 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4189 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4190 . j - global column indices 4191 - a - optional matrix values 4192 4193 Output Parameter: 4194 . mat - the matrix 4195 4196 Level: intermediate 4197 4198 Notes: 4199 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4200 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4201 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4202 4203 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4204 4205 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4206 4207 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4208 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4209 4210 The format which is used for the sparse matrix input, is equivalent to a 4211 row-major ordering, i.e., for the following matrix, the input data expected is 4212 as shown 4213 .vb 4214 1 0 0 4215 2 0 3 P0 4216 ------- 4217 4 5 6 P1 4218 4219 Process0 [P0] rows_owned=[0,1] 4220 i = {0,1,3} [size = nrow+1 = 2+1] 4221 j = {0,0,2} [size = 3] 4222 v = {1,2,3} [size = 3] 4223 4224 Process1 [P1] rows_owned=[2] 4225 i = {0,3} [size = nrow+1 = 1+1] 4226 j = {0,1,2} [size = 3] 4227 v = {4,5,6} [size = 3] 4228 .ve 4229 4230 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4231 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4232 @*/ 4233 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4234 { 4235 PetscFunctionBegin; 4236 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4237 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4238 PetscCall(MatCreate(comm, mat)); 4239 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4240 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4241 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4242 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4243 PetscFunctionReturn(PETSC_SUCCESS); 4244 } 4245 4246 /*@ 4247 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4248 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4249 from `MatCreateMPIAIJWithArrays()` 4250 4251 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4252 4253 Collective 4254 4255 Input Parameters: 4256 + mat - the matrix 4257 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4258 . n - This value should be the same as the local size used in creating the 4259 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4260 calculated if N is given) For square matrices n is almost always m. 4261 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4262 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4263 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4264 . J - column indices 4265 - v - matrix values 4266 4267 Level: deprecated 4268 4269 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4270 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4271 @*/ 4272 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4273 { 4274 PetscInt nnz, i; 4275 PetscBool nooffprocentries; 4276 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4277 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4278 PetscScalar *ad, *ao; 4279 PetscInt ldi, Iii, md; 4280 const PetscInt *Adi = Ad->i; 4281 PetscInt *ld = Aij->ld; 4282 4283 PetscFunctionBegin; 4284 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4285 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4286 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4287 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4288 4289 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4290 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4291 4292 for (i = 0; i < m; i++) { 4293 if (PetscDefined(USE_DEBUG)) { 4294 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4295 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4296 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4297 } 4298 } 4299 nnz = Ii[i + 1] - Ii[i]; 4300 Iii = Ii[i]; 4301 ldi = ld[i]; 4302 md = Adi[i + 1] - Adi[i]; 4303 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4304 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4305 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4306 ad += md; 4307 ao += nnz - md; 4308 } 4309 nooffprocentries = mat->nooffprocentries; 4310 mat->nooffprocentries = PETSC_TRUE; 4311 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4312 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4313 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4314 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4315 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4316 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4317 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4318 mat->nooffprocentries = nooffprocentries; 4319 PetscFunctionReturn(PETSC_SUCCESS); 4320 } 4321 4322 /*@ 4323 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4324 4325 Collective 4326 4327 Input Parameters: 4328 + mat - the matrix 4329 - v - matrix values, stored by row 4330 4331 Level: intermediate 4332 4333 Notes: 4334 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4335 4336 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4337 4338 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4339 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4340 @*/ 4341 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4342 { 4343 PetscInt nnz, i, m; 4344 PetscBool nooffprocentries; 4345 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4346 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4347 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4348 PetscScalar *ad, *ao; 4349 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4350 PetscInt ldi, Iii, md; 4351 PetscInt *ld = Aij->ld; 4352 4353 PetscFunctionBegin; 4354 m = mat->rmap->n; 4355 4356 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4357 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4358 Iii = 0; 4359 for (i = 0; i < m; i++) { 4360 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4361 ldi = ld[i]; 4362 md = Adi[i + 1] - Adi[i]; 4363 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4364 ad += md; 4365 if (ao) { 4366 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4367 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4368 ao += nnz - md; 4369 } 4370 Iii += nnz; 4371 } 4372 nooffprocentries = mat->nooffprocentries; 4373 mat->nooffprocentries = PETSC_TRUE; 4374 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4375 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4376 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4377 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4378 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4379 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4380 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4381 mat->nooffprocentries = nooffprocentries; 4382 PetscFunctionReturn(PETSC_SUCCESS); 4383 } 4384 4385 /*@ 4386 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4387 (the default parallel PETSc format). For good matrix assembly performance 4388 the user should preallocate the matrix storage by setting the parameters 4389 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4390 4391 Collective 4392 4393 Input Parameters: 4394 + comm - MPI communicator 4395 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4396 This value should be the same as the local size used in creating the 4397 y vector for the matrix-vector product y = Ax. 4398 . n - This value should be the same as the local size used in creating the 4399 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4400 calculated if N is given) For square matrices n is almost always m. 4401 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4402 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4403 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4404 (same value is used for all local rows) 4405 . d_nnz - array containing the number of nonzeros in the various rows of the 4406 DIAGONAL portion of the local submatrix (possibly different for each row) 4407 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4408 The size of this array is equal to the number of local rows, i.e 'm'. 4409 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4410 submatrix (same value is used for all local rows). 4411 - o_nnz - array containing the number of nonzeros in the various rows of the 4412 OFF-DIAGONAL portion of the local submatrix (possibly different for 4413 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4414 structure. The size of this array is equal to the number 4415 of local rows, i.e 'm'. 4416 4417 Output Parameter: 4418 . A - the matrix 4419 4420 Options Database Keys: 4421 + -mat_no_inode - Do not use inodes 4422 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4423 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4424 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4425 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4426 4427 Level: intermediate 4428 4429 Notes: 4430 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4431 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4432 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4433 4434 If the *_nnz parameter is given then the *_nz parameter is ignored 4435 4436 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4437 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4438 storage requirements for this matrix. 4439 4440 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4441 processor than it must be used on all processors that share the object for 4442 that argument. 4443 4444 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4445 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4446 4447 The user MUST specify either the local or global matrix dimensions 4448 (possibly both). 4449 4450 The parallel matrix is partitioned across processors such that the 4451 first `m0` rows belong to process 0, the next `m1` rows belong to 4452 process 1, the next `m2` rows belong to process 2, etc., where 4453 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4454 values corresponding to [m x N] submatrix. 4455 4456 The columns are logically partitioned with the n0 columns belonging 4457 to 0th partition, the next n1 columns belonging to the next 4458 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4459 4460 The DIAGONAL portion of the local submatrix on any given processor 4461 is the submatrix corresponding to the rows and columns m,n 4462 corresponding to the given processor. i.e diagonal matrix on 4463 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4464 etc. The remaining portion of the local submatrix [m x (N-n)] 4465 constitute the OFF-DIAGONAL portion. The example below better 4466 illustrates this concept. The two matrices, the DIAGONAL portion and 4467 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4468 4469 For a square global matrix we define each processor's diagonal portion 4470 to be its local rows and the corresponding columns (a square submatrix); 4471 each processor's off-diagonal portion encompasses the remainder of the 4472 local matrix (a rectangular submatrix). 4473 4474 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4475 4476 When calling this routine with a single process communicator, a matrix of 4477 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4478 type of communicator, use the construction mechanism 4479 .vb 4480 MatCreate(..., &A); 4481 MatSetType(A, MATMPIAIJ); 4482 MatSetSizes(A, m, n, M, N); 4483 MatMPIAIJSetPreallocation(A, ...); 4484 .ve 4485 4486 By default, this format uses inodes (identical nodes) when possible. 4487 We search for consecutive rows with the same nonzero structure, thereby 4488 reusing matrix information to achieve increased efficiency. 4489 4490 Example Usage: 4491 Consider the following 8x8 matrix with 34 non-zero values, that is 4492 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4493 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4494 as follows 4495 4496 .vb 4497 1 2 0 | 0 3 0 | 0 4 4498 Proc0 0 5 6 | 7 0 0 | 8 0 4499 9 0 10 | 11 0 0 | 12 0 4500 ------------------------------------- 4501 13 0 14 | 15 16 17 | 0 0 4502 Proc1 0 18 0 | 19 20 21 | 0 0 4503 0 0 0 | 22 23 0 | 24 0 4504 ------------------------------------- 4505 Proc2 25 26 27 | 0 0 28 | 29 0 4506 30 0 0 | 31 32 33 | 0 34 4507 .ve 4508 4509 This can be represented as a collection of submatrices as 4510 4511 .vb 4512 A B C 4513 D E F 4514 G H I 4515 .ve 4516 4517 Where the submatrices A,B,C are owned by proc0, D,E,F are 4518 owned by proc1, G,H,I are owned by proc2. 4519 4520 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4521 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4522 The 'M','N' parameters are 8,8, and have the same values on all procs. 4523 4524 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4525 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4526 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4527 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4528 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4529 matrix, and [DF] as another SeqAIJ matrix. 4530 4531 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4532 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4533 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4534 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4535 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4536 In this case, the values of `d_nz`,`o_nz` are 4537 .vb 4538 proc0 dnz = 2, o_nz = 2 4539 proc1 dnz = 3, o_nz = 2 4540 proc2 dnz = 1, o_nz = 4 4541 .ve 4542 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4543 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4544 for proc3. i.e we are using 12+15+10=37 storage locations to store 4545 34 values. 4546 4547 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4548 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4549 In the above case the values for d_nnz,o_nnz are 4550 .vb 4551 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4552 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4553 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4554 .ve 4555 Here the space allocated is sum of all the above values i.e 34, and 4556 hence pre-allocation is perfect. 4557 4558 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4559 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4560 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4561 @*/ 4562 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4563 { 4564 PetscMPIInt size; 4565 4566 PetscFunctionBegin; 4567 PetscCall(MatCreate(comm, A)); 4568 PetscCall(MatSetSizes(*A, m, n, M, N)); 4569 PetscCallMPI(MPI_Comm_size(comm, &size)); 4570 if (size > 1) { 4571 PetscCall(MatSetType(*A, MATMPIAIJ)); 4572 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4573 } else { 4574 PetscCall(MatSetType(*A, MATSEQAIJ)); 4575 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4576 } 4577 PetscFunctionReturn(PETSC_SUCCESS); 4578 } 4579 4580 /*MC 4581 MatMPIAIJGetSeqAIJF90 - Returns the local pieces of this distributed matrix 4582 4583 Synopsis: 4584 MatMPIAIJGetSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4585 4586 Not Collective 4587 4588 Input Parameter: 4589 . A - the `MATMPIAIJ` matrix 4590 4591 Output Parameters: 4592 + Ad - the diagonal portion of the matrix 4593 . Ao - the off-diagonal portion of the matrix 4594 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4595 - ierr - error code 4596 4597 Level: advanced 4598 4599 Note: 4600 Use `MatMPIAIJRestoreSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4601 4602 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJRestoreSeqAIJF90()` 4603 M*/ 4604 4605 /*MC 4606 MatMPIAIJRestoreSeqAIJF90 - call after `MatMPIAIJGetSeqAIJF90()` when you no longer need access to the matrices and `colmap` 4607 4608 Synopsis: 4609 MatMPIAIJRestoreSeqAIJF90(Mat A, Mat Ad, Mat Ao, {PetscInt, pointer :: colmap(:)},integer ierr) 4610 4611 Not Collective 4612 4613 Input Parameters: 4614 + A - the `MATMPIAIJ` matrix 4615 . Ad - the diagonal portion of the matrix 4616 . Ao - the off-diagonal portion of the matrix 4617 . colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4618 - ierr - error code 4619 4620 Level: advanced 4621 4622 .seealso: [](ch_matrices), `Mat`, [](sec_fortranarrays), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJ()`, `MatMPIAIJGetSeqAIJF90()` 4623 M*/ 4624 4625 /*@C 4626 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4627 4628 Not Collective 4629 4630 Input Parameter: 4631 . A - The `MATMPIAIJ` matrix 4632 4633 Output Parameters: 4634 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4635 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4636 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4637 4638 Level: intermediate 4639 4640 Note: 4641 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4642 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4643 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4644 local column numbers to global column numbers in the original matrix. 4645 4646 Fortran Notes: 4647 `MatMPIAIJGetSeqAIJ()` Fortran binding is deprecated (since PETSc 3.19), use `MatMPIAIJGetSeqAIJF90()` 4648 4649 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetSeqAIJF90()`, `MatMPIAIJRestoreSeqAIJF90()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4650 @*/ 4651 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4652 { 4653 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4654 PetscBool flg; 4655 4656 PetscFunctionBegin; 4657 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4658 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4659 if (Ad) *Ad = a->A; 4660 if (Ao) *Ao = a->B; 4661 if (colmap) *colmap = a->garray; 4662 PetscFunctionReturn(PETSC_SUCCESS); 4663 } 4664 4665 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4666 { 4667 PetscInt m, N, i, rstart, nnz, Ii; 4668 PetscInt *indx; 4669 PetscScalar *values; 4670 MatType rootType; 4671 4672 PetscFunctionBegin; 4673 PetscCall(MatGetSize(inmat, &m, &N)); 4674 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4675 PetscInt *dnz, *onz, sum, bs, cbs; 4676 4677 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4678 /* Check sum(n) = N */ 4679 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4680 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4681 4682 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4683 rstart -= m; 4684 4685 MatPreallocateBegin(comm, m, n, dnz, onz); 4686 for (i = 0; i < m; i++) { 4687 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4688 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4689 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4690 } 4691 4692 PetscCall(MatCreate(comm, outmat)); 4693 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4694 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4695 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4696 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4697 PetscCall(MatSetType(*outmat, rootType)); 4698 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4699 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4700 MatPreallocateEnd(dnz, onz); 4701 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4702 } 4703 4704 /* numeric phase */ 4705 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4706 for (i = 0; i < m; i++) { 4707 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4708 Ii = i + rstart; 4709 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4710 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4711 } 4712 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4713 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4714 PetscFunctionReturn(PETSC_SUCCESS); 4715 } 4716 4717 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4718 { 4719 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4720 4721 PetscFunctionBegin; 4722 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4723 PetscCall(PetscFree(merge->id_r)); 4724 PetscCall(PetscFree(merge->len_s)); 4725 PetscCall(PetscFree(merge->len_r)); 4726 PetscCall(PetscFree(merge->bi)); 4727 PetscCall(PetscFree(merge->bj)); 4728 PetscCall(PetscFree(merge->buf_ri[0])); 4729 PetscCall(PetscFree(merge->buf_ri)); 4730 PetscCall(PetscFree(merge->buf_rj[0])); 4731 PetscCall(PetscFree(merge->buf_rj)); 4732 PetscCall(PetscFree(merge->coi)); 4733 PetscCall(PetscFree(merge->coj)); 4734 PetscCall(PetscFree(merge->owners_co)); 4735 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4736 PetscCall(PetscFree(merge)); 4737 PetscFunctionReturn(PETSC_SUCCESS); 4738 } 4739 4740 #include <../src/mat/utils/freespace.h> 4741 #include <petscbt.h> 4742 4743 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4744 { 4745 MPI_Comm comm; 4746 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4747 PetscMPIInt size, rank, taga, *len_s; 4748 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4749 PetscMPIInt proc, k; 4750 PetscInt **buf_ri, **buf_rj; 4751 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4752 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4753 MPI_Request *s_waits, *r_waits; 4754 MPI_Status *status; 4755 const MatScalar *aa, *a_a; 4756 MatScalar **abuf_r, *ba_i; 4757 Mat_Merge_SeqsToMPI *merge; 4758 PetscContainer container; 4759 4760 PetscFunctionBegin; 4761 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4762 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4763 4764 PetscCallMPI(MPI_Comm_size(comm, &size)); 4765 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4766 4767 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4768 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4769 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4770 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4771 aa = a_a; 4772 4773 bi = merge->bi; 4774 bj = merge->bj; 4775 buf_ri = merge->buf_ri; 4776 buf_rj = merge->buf_rj; 4777 4778 PetscCall(PetscMalloc1(size, &status)); 4779 owners = merge->rowmap->range; 4780 len_s = merge->len_s; 4781 4782 /* send and recv matrix values */ 4783 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4784 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4785 4786 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4787 for (proc = 0, k = 0; proc < size; proc++) { 4788 if (!len_s[proc]) continue; 4789 i = owners[proc]; 4790 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4791 k++; 4792 } 4793 4794 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4795 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4796 PetscCall(PetscFree(status)); 4797 4798 PetscCall(PetscFree(s_waits)); 4799 PetscCall(PetscFree(r_waits)); 4800 4801 /* insert mat values of mpimat */ 4802 PetscCall(PetscMalloc1(N, &ba_i)); 4803 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4804 4805 for (k = 0; k < merge->nrecv; k++) { 4806 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4807 nrows = *buf_ri_k[k]; 4808 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4809 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4810 } 4811 4812 /* set values of ba */ 4813 m = merge->rowmap->n; 4814 for (i = 0; i < m; i++) { 4815 arow = owners[rank] + i; 4816 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4817 bnzi = bi[i + 1] - bi[i]; 4818 PetscCall(PetscArrayzero(ba_i, bnzi)); 4819 4820 /* add local non-zero vals of this proc's seqmat into ba */ 4821 anzi = ai[arow + 1] - ai[arow]; 4822 aj = a->j + ai[arow]; 4823 aa = a_a + ai[arow]; 4824 nextaj = 0; 4825 for (j = 0; nextaj < anzi; j++) { 4826 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4827 ba_i[j] += aa[nextaj++]; 4828 } 4829 } 4830 4831 /* add received vals into ba */ 4832 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4833 /* i-th row */ 4834 if (i == *nextrow[k]) { 4835 anzi = *(nextai[k] + 1) - *nextai[k]; 4836 aj = buf_rj[k] + *nextai[k]; 4837 aa = abuf_r[k] + *nextai[k]; 4838 nextaj = 0; 4839 for (j = 0; nextaj < anzi; j++) { 4840 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4841 ba_i[j] += aa[nextaj++]; 4842 } 4843 } 4844 nextrow[k]++; 4845 nextai[k]++; 4846 } 4847 } 4848 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4849 } 4850 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4851 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4852 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4853 4854 PetscCall(PetscFree(abuf_r[0])); 4855 PetscCall(PetscFree(abuf_r)); 4856 PetscCall(PetscFree(ba_i)); 4857 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4858 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4859 PetscFunctionReturn(PETSC_SUCCESS); 4860 } 4861 4862 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4863 { 4864 Mat B_mpi; 4865 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4866 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4867 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4868 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4869 PetscInt len, *dnz, *onz, bs, cbs; 4870 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4871 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4872 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4873 MPI_Status *status; 4874 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4875 PetscBT lnkbt; 4876 Mat_Merge_SeqsToMPI *merge; 4877 PetscContainer container; 4878 4879 PetscFunctionBegin; 4880 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4881 4882 /* make sure it is a PETSc comm */ 4883 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4884 PetscCallMPI(MPI_Comm_size(comm, &size)); 4885 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4886 4887 PetscCall(PetscNew(&merge)); 4888 PetscCall(PetscMalloc1(size, &status)); 4889 4890 /* determine row ownership */ 4891 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4892 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4893 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4894 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4895 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4896 PetscCall(PetscMalloc1(size, &len_si)); 4897 PetscCall(PetscMalloc1(size, &merge->len_s)); 4898 4899 m = merge->rowmap->n; 4900 owners = merge->rowmap->range; 4901 4902 /* determine the number of messages to send, their lengths */ 4903 len_s = merge->len_s; 4904 4905 len = 0; /* length of buf_si[] */ 4906 merge->nsend = 0; 4907 for (PetscMPIInt proc = 0; proc < size; proc++) { 4908 len_si[proc] = 0; 4909 if (proc == rank) { 4910 len_s[proc] = 0; 4911 } else { 4912 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4913 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4914 } 4915 if (len_s[proc]) { 4916 merge->nsend++; 4917 nrows = 0; 4918 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4919 if (ai[i + 1] > ai[i]) nrows++; 4920 } 4921 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4922 len += len_si[proc]; 4923 } 4924 } 4925 4926 /* determine the number and length of messages to receive for ij-structure */ 4927 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4928 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4929 4930 /* post the Irecv of j-structure */ 4931 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4932 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4933 4934 /* post the Isend of j-structure */ 4935 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4936 4937 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4938 if (!len_s[proc]) continue; 4939 i = owners[proc]; 4940 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4941 k++; 4942 } 4943 4944 /* receives and sends of j-structure are complete */ 4945 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4946 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4947 4948 /* send and recv i-structure */ 4949 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4950 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4951 4952 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4953 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4954 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4955 if (!len_s[proc]) continue; 4956 /* form outgoing message for i-structure: 4957 buf_si[0]: nrows to be sent 4958 [1:nrows]: row index (global) 4959 [nrows+1:2*nrows+1]: i-structure index 4960 */ 4961 nrows = len_si[proc] / 2 - 1; 4962 buf_si_i = buf_si + nrows + 1; 4963 buf_si[0] = nrows; 4964 buf_si_i[0] = 0; 4965 nrows = 0; 4966 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4967 anzi = ai[i + 1] - ai[i]; 4968 if (anzi) { 4969 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4970 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4971 nrows++; 4972 } 4973 } 4974 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4975 k++; 4976 buf_si += len_si[proc]; 4977 } 4978 4979 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4980 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4981 4982 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4983 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4984 4985 PetscCall(PetscFree(len_si)); 4986 PetscCall(PetscFree(len_ri)); 4987 PetscCall(PetscFree(rj_waits)); 4988 PetscCall(PetscFree2(si_waits, sj_waits)); 4989 PetscCall(PetscFree(ri_waits)); 4990 PetscCall(PetscFree(buf_s)); 4991 PetscCall(PetscFree(status)); 4992 4993 /* compute a local seq matrix in each processor */ 4994 /* allocate bi array and free space for accumulating nonzero column info */ 4995 PetscCall(PetscMalloc1(m + 1, &bi)); 4996 bi[0] = 0; 4997 4998 /* create and initialize a linked list */ 4999 nlnk = N + 1; 5000 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 5001 5002 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 5003 len = ai[owners[rank + 1]] - ai[owners[rank]]; 5004 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 5005 5006 current_space = free_space; 5007 5008 /* determine symbolic info for each local row */ 5009 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 5010 5011 for (k = 0; k < merge->nrecv; k++) { 5012 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 5013 nrows = *buf_ri_k[k]; 5014 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 5015 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 5016 } 5017 5018 MatPreallocateBegin(comm, m, n, dnz, onz); 5019 len = 0; 5020 for (i = 0; i < m; i++) { 5021 bnzi = 0; 5022 /* add local non-zero cols of this proc's seqmat into lnk */ 5023 arow = owners[rank] + i; 5024 anzi = ai[arow + 1] - ai[arow]; 5025 aj = a->j + ai[arow]; 5026 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5027 bnzi += nlnk; 5028 /* add received col data into lnk */ 5029 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 5030 if (i == *nextrow[k]) { /* i-th row */ 5031 anzi = *(nextai[k] + 1) - *nextai[k]; 5032 aj = buf_rj[k] + *nextai[k]; 5033 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 5034 bnzi += nlnk; 5035 nextrow[k]++; 5036 nextai[k]++; 5037 } 5038 } 5039 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 5040 5041 /* if free space is not available, make more free space */ 5042 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 5043 /* copy data into free space, then initialize lnk */ 5044 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 5045 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 5046 5047 current_space->array += bnzi; 5048 current_space->local_used += bnzi; 5049 current_space->local_remaining -= bnzi; 5050 5051 bi[i + 1] = bi[i] + bnzi; 5052 } 5053 5054 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 5055 5056 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 5057 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 5058 PetscCall(PetscLLDestroy(lnk, lnkbt)); 5059 5060 /* create symbolic parallel matrix B_mpi */ 5061 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 5062 PetscCall(MatCreate(comm, &B_mpi)); 5063 if (n == PETSC_DECIDE) { 5064 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 5065 } else { 5066 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 5067 } 5068 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 5069 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 5070 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 5071 MatPreallocateEnd(dnz, onz); 5072 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 5073 5074 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 5075 B_mpi->assembled = PETSC_FALSE; 5076 merge->bi = bi; 5077 merge->bj = bj; 5078 merge->buf_ri = buf_ri; 5079 merge->buf_rj = buf_rj; 5080 merge->coi = NULL; 5081 merge->coj = NULL; 5082 merge->owners_co = NULL; 5083 5084 PetscCall(PetscCommDestroy(&comm)); 5085 5086 /* attach the supporting struct to B_mpi for reuse */ 5087 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5088 PetscCall(PetscContainerSetPointer(container, merge)); 5089 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5090 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5091 PetscCall(PetscContainerDestroy(&container)); 5092 *mpimat = B_mpi; 5093 5094 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5095 PetscFunctionReturn(PETSC_SUCCESS); 5096 } 5097 5098 /*@ 5099 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5100 matrices from each processor 5101 5102 Collective 5103 5104 Input Parameters: 5105 + comm - the communicators the parallel matrix will live on 5106 . seqmat - the input sequential matrices 5107 . m - number of local rows (or `PETSC_DECIDE`) 5108 . n - number of local columns (or `PETSC_DECIDE`) 5109 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5110 5111 Output Parameter: 5112 . mpimat - the parallel matrix generated 5113 5114 Level: advanced 5115 5116 Note: 5117 The dimensions of the sequential matrix in each processor MUST be the same. 5118 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5119 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5120 5121 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5122 @*/ 5123 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5124 { 5125 PetscMPIInt size; 5126 5127 PetscFunctionBegin; 5128 PetscCallMPI(MPI_Comm_size(comm, &size)); 5129 if (size == 1) { 5130 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5131 if (scall == MAT_INITIAL_MATRIX) { 5132 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5133 } else { 5134 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5135 } 5136 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5137 PetscFunctionReturn(PETSC_SUCCESS); 5138 } 5139 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5140 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5141 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5142 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5143 PetscFunctionReturn(PETSC_SUCCESS); 5144 } 5145 5146 /*@ 5147 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5148 5149 Not Collective 5150 5151 Input Parameter: 5152 . A - the matrix 5153 5154 Output Parameter: 5155 . A_loc - the local sequential matrix generated 5156 5157 Level: developer 5158 5159 Notes: 5160 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5161 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5162 `n` is the global column count obtained with `MatGetSize()` 5163 5164 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5165 5166 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5167 5168 Destroy the matrix with `MatDestroy()` 5169 5170 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5171 @*/ 5172 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5173 { 5174 PetscBool mpi; 5175 5176 PetscFunctionBegin; 5177 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5178 if (mpi) { 5179 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5180 } else { 5181 *A_loc = A; 5182 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5183 } 5184 PetscFunctionReturn(PETSC_SUCCESS); 5185 } 5186 5187 /*@ 5188 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5189 5190 Not Collective 5191 5192 Input Parameters: 5193 + A - the matrix 5194 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5195 5196 Output Parameter: 5197 . A_loc - the local sequential matrix generated 5198 5199 Level: developer 5200 5201 Notes: 5202 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5203 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5204 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5205 5206 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5207 5208 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5209 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5210 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5211 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5212 5213 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5214 @*/ 5215 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5216 { 5217 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5218 Mat_SeqAIJ *mat, *a, *b; 5219 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5220 const PetscScalar *aa, *ba, *aav, *bav; 5221 PetscScalar *ca, *cam; 5222 PetscMPIInt size; 5223 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5224 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5225 PetscBool match; 5226 5227 PetscFunctionBegin; 5228 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5229 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5230 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5231 if (size == 1) { 5232 if (scall == MAT_INITIAL_MATRIX) { 5233 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5234 *A_loc = mpimat->A; 5235 } else if (scall == MAT_REUSE_MATRIX) { 5236 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5237 } 5238 PetscFunctionReturn(PETSC_SUCCESS); 5239 } 5240 5241 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5242 a = (Mat_SeqAIJ *)mpimat->A->data; 5243 b = (Mat_SeqAIJ *)mpimat->B->data; 5244 ai = a->i; 5245 aj = a->j; 5246 bi = b->i; 5247 bj = b->j; 5248 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5249 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5250 aa = aav; 5251 ba = bav; 5252 if (scall == MAT_INITIAL_MATRIX) { 5253 PetscCall(PetscMalloc1(1 + am, &ci)); 5254 ci[0] = 0; 5255 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5256 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5257 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5258 k = 0; 5259 for (i = 0; i < am; i++) { 5260 ncols_o = bi[i + 1] - bi[i]; 5261 ncols_d = ai[i + 1] - ai[i]; 5262 /* off-diagonal portion of A */ 5263 for (jo = 0; jo < ncols_o; jo++) { 5264 col = cmap[*bj]; 5265 if (col >= cstart) break; 5266 cj[k] = col; 5267 bj++; 5268 ca[k++] = *ba++; 5269 } 5270 /* diagonal portion of A */ 5271 for (j = 0; j < ncols_d; j++) { 5272 cj[k] = cstart + *aj++; 5273 ca[k++] = *aa++; 5274 } 5275 /* off-diagonal portion of A */ 5276 for (j = jo; j < ncols_o; j++) { 5277 cj[k] = cmap[*bj++]; 5278 ca[k++] = *ba++; 5279 } 5280 } 5281 /* put together the new matrix */ 5282 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5283 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5284 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5285 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5286 mat->free_a = PETSC_TRUE; 5287 mat->free_ij = PETSC_TRUE; 5288 mat->nonew = 0; 5289 } else if (scall == MAT_REUSE_MATRIX) { 5290 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5291 ci = mat->i; 5292 cj = mat->j; 5293 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5294 for (i = 0; i < am; i++) { 5295 /* off-diagonal portion of A */ 5296 ncols_o = bi[i + 1] - bi[i]; 5297 for (jo = 0; jo < ncols_o; jo++) { 5298 col = cmap[*bj]; 5299 if (col >= cstart) break; 5300 *cam++ = *ba++; 5301 bj++; 5302 } 5303 /* diagonal portion of A */ 5304 ncols_d = ai[i + 1] - ai[i]; 5305 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5306 /* off-diagonal portion of A */ 5307 for (j = jo; j < ncols_o; j++) { 5308 *cam++ = *ba++; 5309 bj++; 5310 } 5311 } 5312 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5313 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5314 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5315 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5316 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5317 PetscFunctionReturn(PETSC_SUCCESS); 5318 } 5319 5320 /*@ 5321 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5322 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5323 5324 Not Collective 5325 5326 Input Parameters: 5327 + A - the matrix 5328 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5329 5330 Output Parameters: 5331 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5332 - A_loc - the local sequential matrix generated 5333 5334 Level: developer 5335 5336 Note: 5337 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5338 part, then those associated with the off-diagonal part (in its local ordering) 5339 5340 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5341 @*/ 5342 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5343 { 5344 Mat Ao, Ad; 5345 const PetscInt *cmap; 5346 PetscMPIInt size; 5347 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5348 5349 PetscFunctionBegin; 5350 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5351 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5352 if (size == 1) { 5353 if (scall == MAT_INITIAL_MATRIX) { 5354 PetscCall(PetscObjectReference((PetscObject)Ad)); 5355 *A_loc = Ad; 5356 } else if (scall == MAT_REUSE_MATRIX) { 5357 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5358 } 5359 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5360 PetscFunctionReturn(PETSC_SUCCESS); 5361 } 5362 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5363 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5364 if (f) { 5365 PetscCall((*f)(A, scall, glob, A_loc)); 5366 } else { 5367 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5368 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5369 Mat_SeqAIJ *c; 5370 PetscInt *ai = a->i, *aj = a->j; 5371 PetscInt *bi = b->i, *bj = b->j; 5372 PetscInt *ci, *cj; 5373 const PetscScalar *aa, *ba; 5374 PetscScalar *ca; 5375 PetscInt i, j, am, dn, on; 5376 5377 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5378 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5379 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5380 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5381 if (scall == MAT_INITIAL_MATRIX) { 5382 PetscInt k; 5383 PetscCall(PetscMalloc1(1 + am, &ci)); 5384 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5385 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5386 ci[0] = 0; 5387 for (i = 0, k = 0; i < am; i++) { 5388 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5389 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5390 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5391 /* diagonal portion of A */ 5392 for (j = 0; j < ncols_d; j++, k++) { 5393 cj[k] = *aj++; 5394 ca[k] = *aa++; 5395 } 5396 /* off-diagonal portion of A */ 5397 for (j = 0; j < ncols_o; j++, k++) { 5398 cj[k] = dn + *bj++; 5399 ca[k] = *ba++; 5400 } 5401 } 5402 /* put together the new matrix */ 5403 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5404 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5405 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5406 c = (Mat_SeqAIJ *)(*A_loc)->data; 5407 c->free_a = PETSC_TRUE; 5408 c->free_ij = PETSC_TRUE; 5409 c->nonew = 0; 5410 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5411 } else if (scall == MAT_REUSE_MATRIX) { 5412 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5413 for (i = 0; i < am; i++) { 5414 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5415 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5416 /* diagonal portion of A */ 5417 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5418 /* off-diagonal portion of A */ 5419 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5420 } 5421 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5422 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5423 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5424 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5425 if (glob) { 5426 PetscInt cst, *gidx; 5427 5428 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5429 PetscCall(PetscMalloc1(dn + on, &gidx)); 5430 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5431 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5432 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5433 } 5434 } 5435 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5436 PetscFunctionReturn(PETSC_SUCCESS); 5437 } 5438 5439 /*@C 5440 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5441 5442 Not Collective 5443 5444 Input Parameters: 5445 + A - the matrix 5446 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5447 . row - index set of rows to extract (or `NULL`) 5448 - col - index set of columns to extract (or `NULL`) 5449 5450 Output Parameter: 5451 . A_loc - the local sequential matrix generated 5452 5453 Level: developer 5454 5455 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5456 @*/ 5457 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5458 { 5459 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5460 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5461 IS isrowa, iscola; 5462 Mat *aloc; 5463 PetscBool match; 5464 5465 PetscFunctionBegin; 5466 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5467 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5468 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5469 if (!row) { 5470 start = A->rmap->rstart; 5471 end = A->rmap->rend; 5472 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5473 } else { 5474 isrowa = *row; 5475 } 5476 if (!col) { 5477 start = A->cmap->rstart; 5478 cmap = a->garray; 5479 nzA = a->A->cmap->n; 5480 nzB = a->B->cmap->n; 5481 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5482 ncols = 0; 5483 for (i = 0; i < nzB; i++) { 5484 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5485 else break; 5486 } 5487 imark = i; 5488 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5489 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5490 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5491 } else { 5492 iscola = *col; 5493 } 5494 if (scall != MAT_INITIAL_MATRIX) { 5495 PetscCall(PetscMalloc1(1, &aloc)); 5496 aloc[0] = *A_loc; 5497 } 5498 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5499 if (!col) { /* attach global id of condensed columns */ 5500 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5501 } 5502 *A_loc = aloc[0]; 5503 PetscCall(PetscFree(aloc)); 5504 if (!row) PetscCall(ISDestroy(&isrowa)); 5505 if (!col) PetscCall(ISDestroy(&iscola)); 5506 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5507 PetscFunctionReturn(PETSC_SUCCESS); 5508 } 5509 5510 /* 5511 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5512 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5513 * on a global size. 5514 * */ 5515 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5516 { 5517 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5518 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5519 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5520 PetscMPIInt owner; 5521 PetscSFNode *iremote, *oiremote; 5522 const PetscInt *lrowindices; 5523 PetscSF sf, osf; 5524 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5525 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5526 MPI_Comm comm; 5527 ISLocalToGlobalMapping mapping; 5528 const PetscScalar *pd_a, *po_a; 5529 5530 PetscFunctionBegin; 5531 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5532 /* plocalsize is the number of roots 5533 * nrows is the number of leaves 5534 * */ 5535 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5536 PetscCall(ISGetLocalSize(rows, &nrows)); 5537 PetscCall(PetscCalloc1(nrows, &iremote)); 5538 PetscCall(ISGetIndices(rows, &lrowindices)); 5539 for (i = 0; i < nrows; i++) { 5540 /* Find a remote index and an owner for a row 5541 * The row could be local or remote 5542 * */ 5543 owner = 0; 5544 lidx = 0; 5545 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5546 iremote[i].index = lidx; 5547 iremote[i].rank = owner; 5548 } 5549 /* Create SF to communicate how many nonzero columns for each row */ 5550 PetscCall(PetscSFCreate(comm, &sf)); 5551 /* SF will figure out the number of nonzero columns for each row, and their 5552 * offsets 5553 * */ 5554 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5555 PetscCall(PetscSFSetFromOptions(sf)); 5556 PetscCall(PetscSFSetUp(sf)); 5557 5558 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5559 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5560 PetscCall(PetscCalloc1(nrows, &pnnz)); 5561 roffsets[0] = 0; 5562 roffsets[1] = 0; 5563 for (i = 0; i < plocalsize; i++) { 5564 /* diagonal */ 5565 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5566 /* off-diagonal */ 5567 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5568 /* compute offsets so that we relative location for each row */ 5569 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5570 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5571 } 5572 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5573 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5574 /* 'r' means root, and 'l' means leaf */ 5575 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5577 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5578 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5579 PetscCall(PetscSFDestroy(&sf)); 5580 PetscCall(PetscFree(roffsets)); 5581 PetscCall(PetscFree(nrcols)); 5582 dntotalcols = 0; 5583 ontotalcols = 0; 5584 ncol = 0; 5585 for (i = 0; i < nrows; i++) { 5586 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5587 ncol = PetscMax(pnnz[i], ncol); 5588 /* diagonal */ 5589 dntotalcols += nlcols[i * 2 + 0]; 5590 /* off-diagonal */ 5591 ontotalcols += nlcols[i * 2 + 1]; 5592 } 5593 /* We do not need to figure the right number of columns 5594 * since all the calculations will be done by going through the raw data 5595 * */ 5596 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5597 PetscCall(MatSetUp(*P_oth)); 5598 PetscCall(PetscFree(pnnz)); 5599 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5600 /* diagonal */ 5601 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5602 /* off-diagonal */ 5603 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5604 /* diagonal */ 5605 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5606 /* off-diagonal */ 5607 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5608 dntotalcols = 0; 5609 ontotalcols = 0; 5610 ntotalcols = 0; 5611 for (i = 0; i < nrows; i++) { 5612 owner = 0; 5613 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5614 /* Set iremote for diag matrix */ 5615 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5616 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5617 iremote[dntotalcols].rank = owner; 5618 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5619 ilocal[dntotalcols++] = ntotalcols++; 5620 } 5621 /* off-diagonal */ 5622 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5623 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5624 oiremote[ontotalcols].rank = owner; 5625 oilocal[ontotalcols++] = ntotalcols++; 5626 } 5627 } 5628 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5629 PetscCall(PetscFree(loffsets)); 5630 PetscCall(PetscFree(nlcols)); 5631 PetscCall(PetscSFCreate(comm, &sf)); 5632 /* P serves as roots and P_oth is leaves 5633 * Diag matrix 5634 * */ 5635 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5636 PetscCall(PetscSFSetFromOptions(sf)); 5637 PetscCall(PetscSFSetUp(sf)); 5638 5639 PetscCall(PetscSFCreate(comm, &osf)); 5640 /* off-diagonal */ 5641 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5642 PetscCall(PetscSFSetFromOptions(osf)); 5643 PetscCall(PetscSFSetUp(osf)); 5644 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5645 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5646 /* operate on the matrix internal data to save memory */ 5647 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5648 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5649 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5650 /* Convert to global indices for diag matrix */ 5651 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5652 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5653 /* We want P_oth store global indices */ 5654 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5655 /* Use memory scalable approach */ 5656 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5657 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5658 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5659 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5660 /* Convert back to local indices */ 5661 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5662 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5663 nout = 0; 5664 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5665 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5666 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5667 /* Exchange values */ 5668 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5669 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5670 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5671 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5672 /* Stop PETSc from shrinking memory */ 5673 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5674 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5675 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5676 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5677 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5678 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5679 PetscCall(PetscSFDestroy(&sf)); 5680 PetscCall(PetscSFDestroy(&osf)); 5681 PetscFunctionReturn(PETSC_SUCCESS); 5682 } 5683 5684 /* 5685 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5686 * This supports MPIAIJ and MAIJ 5687 * */ 5688 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5689 { 5690 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5691 Mat_SeqAIJ *p_oth; 5692 IS rows, map; 5693 PetscHMapI hamp; 5694 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5695 MPI_Comm comm; 5696 PetscSF sf, osf; 5697 PetscBool has; 5698 5699 PetscFunctionBegin; 5700 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5701 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5702 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5703 * and then create a submatrix (that often is an overlapping matrix) 5704 * */ 5705 if (reuse == MAT_INITIAL_MATRIX) { 5706 /* Use a hash table to figure out unique keys */ 5707 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5708 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5709 count = 0; 5710 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5711 for (i = 0; i < a->B->cmap->n; i++) { 5712 key = a->garray[i] / dof; 5713 PetscCall(PetscHMapIHas(hamp, key, &has)); 5714 if (!has) { 5715 mapping[i] = count; 5716 PetscCall(PetscHMapISet(hamp, key, count++)); 5717 } else { 5718 /* Current 'i' has the same value the previous step */ 5719 mapping[i] = count - 1; 5720 } 5721 } 5722 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5723 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5724 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5725 PetscCall(PetscCalloc1(htsize, &rowindices)); 5726 off = 0; 5727 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5728 PetscCall(PetscHMapIDestroy(&hamp)); 5729 PetscCall(PetscSortInt(htsize, rowindices)); 5730 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5731 /* In case, the matrix was already created but users want to recreate the matrix */ 5732 PetscCall(MatDestroy(P_oth)); 5733 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5734 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5735 PetscCall(ISDestroy(&map)); 5736 PetscCall(ISDestroy(&rows)); 5737 } else if (reuse == MAT_REUSE_MATRIX) { 5738 /* If matrix was already created, we simply update values using SF objects 5739 * that as attached to the matrix earlier. 5740 */ 5741 const PetscScalar *pd_a, *po_a; 5742 5743 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5744 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5745 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5746 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5747 /* Update values in place */ 5748 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5749 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5750 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5751 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5752 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5753 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5754 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5755 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5756 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5757 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5758 PetscFunctionReturn(PETSC_SUCCESS); 5759 } 5760 5761 /*@C 5762 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5763 5764 Collective 5765 5766 Input Parameters: 5767 + A - the first matrix in `MATMPIAIJ` format 5768 . B - the second matrix in `MATMPIAIJ` format 5769 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5770 5771 Output Parameters: 5772 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5773 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5774 - B_seq - the sequential matrix generated 5775 5776 Level: developer 5777 5778 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5779 @*/ 5780 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5781 { 5782 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5783 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5784 IS isrowb, iscolb; 5785 Mat *bseq = NULL; 5786 5787 PetscFunctionBegin; 5788 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5789 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5790 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5791 5792 if (scall == MAT_INITIAL_MATRIX) { 5793 start = A->cmap->rstart; 5794 cmap = a->garray; 5795 nzA = a->A->cmap->n; 5796 nzB = a->B->cmap->n; 5797 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5798 ncols = 0; 5799 for (i = 0; i < nzB; i++) { /* row < local row index */ 5800 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5801 else break; 5802 } 5803 imark = i; 5804 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5805 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5806 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5807 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5808 } else { 5809 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5810 isrowb = *rowb; 5811 iscolb = *colb; 5812 PetscCall(PetscMalloc1(1, &bseq)); 5813 bseq[0] = *B_seq; 5814 } 5815 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5816 *B_seq = bseq[0]; 5817 PetscCall(PetscFree(bseq)); 5818 if (!rowb) { 5819 PetscCall(ISDestroy(&isrowb)); 5820 } else { 5821 *rowb = isrowb; 5822 } 5823 if (!colb) { 5824 PetscCall(ISDestroy(&iscolb)); 5825 } else { 5826 *colb = iscolb; 5827 } 5828 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5829 PetscFunctionReturn(PETSC_SUCCESS); 5830 } 5831 5832 /* 5833 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5834 of the OFF-DIAGONAL portion of local A 5835 5836 Collective 5837 5838 Input Parameters: 5839 + A,B - the matrices in `MATMPIAIJ` format 5840 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5841 5842 Output Parameter: 5843 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5844 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5845 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5846 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5847 5848 Developer Note: 5849 This directly accesses information inside the VecScatter associated with the matrix-vector product 5850 for this matrix. This is not desirable.. 5851 5852 Level: developer 5853 5854 */ 5855 5856 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5857 { 5858 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5859 VecScatter ctx; 5860 MPI_Comm comm; 5861 const PetscMPIInt *rprocs, *sprocs; 5862 PetscMPIInt nrecvs, nsends; 5863 const PetscInt *srow, *rstarts, *sstarts; 5864 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5865 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5866 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5867 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5868 PetscMPIInt size, tag, rank, nreqs; 5869 5870 PetscFunctionBegin; 5871 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5872 PetscCallMPI(MPI_Comm_size(comm, &size)); 5873 5874 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5875 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5876 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5877 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5878 5879 if (size == 1) { 5880 startsj_s = NULL; 5881 bufa_ptr = NULL; 5882 *B_oth = NULL; 5883 PetscFunctionReturn(PETSC_SUCCESS); 5884 } 5885 5886 ctx = a->Mvctx; 5887 tag = ((PetscObject)ctx)->tag; 5888 5889 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5890 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5891 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5892 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5893 PetscCall(PetscMalloc1(nreqs, &reqs)); 5894 rwaits = reqs; 5895 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5896 5897 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5898 if (scall == MAT_INITIAL_MATRIX) { 5899 /* i-array */ 5900 /* post receives */ 5901 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5902 for (i = 0; i < nrecvs; i++) { 5903 rowlen = rvalues + rstarts[i] * rbs; 5904 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5905 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5906 } 5907 5908 /* pack the outgoing message */ 5909 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5910 5911 sstartsj[0] = 0; 5912 rstartsj[0] = 0; 5913 len = 0; /* total length of j or a array to be sent */ 5914 if (nsends) { 5915 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5916 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5917 } 5918 for (i = 0; i < nsends; i++) { 5919 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5920 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5921 for (j = 0; j < nrows; j++) { 5922 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5923 for (l = 0; l < sbs; l++) { 5924 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5925 5926 rowlen[j * sbs + l] = ncols; 5927 5928 len += ncols; 5929 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5930 } 5931 k++; 5932 } 5933 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5934 5935 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5936 } 5937 /* recvs and sends of i-array are completed */ 5938 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5939 PetscCall(PetscFree(svalues)); 5940 5941 /* allocate buffers for sending j and a arrays */ 5942 PetscCall(PetscMalloc1(len + 1, &bufj)); 5943 PetscCall(PetscMalloc1(len + 1, &bufa)); 5944 5945 /* create i-array of B_oth */ 5946 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5947 5948 b_othi[0] = 0; 5949 len = 0; /* total length of j or a array to be received */ 5950 k = 0; 5951 for (i = 0; i < nrecvs; i++) { 5952 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5953 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5954 for (j = 0; j < nrows; j++) { 5955 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5956 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5957 k++; 5958 } 5959 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5960 } 5961 PetscCall(PetscFree(rvalues)); 5962 5963 /* allocate space for j and a arrays of B_oth */ 5964 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5965 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5966 5967 /* j-array */ 5968 /* post receives of j-array */ 5969 for (i = 0; i < nrecvs; i++) { 5970 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5971 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5972 } 5973 5974 /* pack the outgoing message j-array */ 5975 if (nsends) k = sstarts[0]; 5976 for (i = 0; i < nsends; i++) { 5977 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5978 bufJ = bufj + sstartsj[i]; 5979 for (j = 0; j < nrows; j++) { 5980 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5981 for (ll = 0; ll < sbs; ll++) { 5982 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5983 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5984 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5985 } 5986 } 5987 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5988 } 5989 5990 /* recvs and sends of j-array are completed */ 5991 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5992 } else if (scall == MAT_REUSE_MATRIX) { 5993 sstartsj = *startsj_s; 5994 rstartsj = *startsj_r; 5995 bufa = *bufa_ptr; 5996 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5997 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5998 5999 /* a-array */ 6000 /* post receives of a-array */ 6001 for (i = 0; i < nrecvs; i++) { 6002 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 6003 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 6004 } 6005 6006 /* pack the outgoing message a-array */ 6007 if (nsends) k = sstarts[0]; 6008 for (i = 0; i < nsends; i++) { 6009 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 6010 bufA = bufa + sstartsj[i]; 6011 for (j = 0; j < nrows; j++) { 6012 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 6013 for (ll = 0; ll < sbs; ll++) { 6014 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6015 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 6016 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 6017 } 6018 } 6019 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 6020 } 6021 /* recvs and sends of a-array are completed */ 6022 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 6023 PetscCall(PetscFree(reqs)); 6024 6025 if (scall == MAT_INITIAL_MATRIX) { 6026 Mat_SeqAIJ *b_oth; 6027 6028 /* put together the new matrix */ 6029 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 6030 6031 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 6032 /* Since these are PETSc arrays, change flags to free them as necessary. */ 6033 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 6034 b_oth->free_a = PETSC_TRUE; 6035 b_oth->free_ij = PETSC_TRUE; 6036 b_oth->nonew = 0; 6037 6038 PetscCall(PetscFree(bufj)); 6039 if (!startsj_s || !bufa_ptr) { 6040 PetscCall(PetscFree2(sstartsj, rstartsj)); 6041 PetscCall(PetscFree(bufa_ptr)); 6042 } else { 6043 *startsj_s = sstartsj; 6044 *startsj_r = rstartsj; 6045 *bufa_ptr = bufa; 6046 } 6047 } else if (scall == MAT_REUSE_MATRIX) { 6048 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 6049 } 6050 6051 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 6052 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 6053 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 6054 PetscFunctionReturn(PETSC_SUCCESS); 6055 } 6056 6057 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 6058 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 6059 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 6060 #if defined(PETSC_HAVE_MKL_SPARSE) 6061 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 6062 #endif 6063 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 6064 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 6065 #if defined(PETSC_HAVE_ELEMENTAL) 6066 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 6067 #endif 6068 #if defined(PETSC_HAVE_SCALAPACK) 6069 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 6070 #endif 6071 #if defined(PETSC_HAVE_HYPRE) 6072 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 6073 #endif 6074 #if defined(PETSC_HAVE_CUDA) 6075 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 6076 #endif 6077 #if defined(PETSC_HAVE_HIP) 6078 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6079 #endif 6080 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6081 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6082 #endif 6083 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6084 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6085 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6086 6087 /* 6088 Computes (B'*A')' since computing B*A directly is untenable 6089 6090 n p p 6091 [ ] [ ] [ ] 6092 m [ A ] * n [ B ] = m [ C ] 6093 [ ] [ ] [ ] 6094 6095 */ 6096 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6097 { 6098 Mat At, Bt, Ct; 6099 6100 PetscFunctionBegin; 6101 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6102 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6103 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6104 PetscCall(MatDestroy(&At)); 6105 PetscCall(MatDestroy(&Bt)); 6106 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6107 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6108 PetscCall(MatDestroy(&Ct)); 6109 PetscFunctionReturn(PETSC_SUCCESS); 6110 } 6111 6112 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6113 { 6114 PetscBool cisdense; 6115 6116 PetscFunctionBegin; 6117 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6118 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6119 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6120 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6121 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6122 PetscCall(MatSetUp(C)); 6123 6124 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6125 PetscFunctionReturn(PETSC_SUCCESS); 6126 } 6127 6128 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6129 { 6130 Mat_Product *product = C->product; 6131 Mat A = product->A, B = product->B; 6132 6133 PetscFunctionBegin; 6134 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6135 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6136 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6137 C->ops->productsymbolic = MatProductSymbolic_AB; 6138 PetscFunctionReturn(PETSC_SUCCESS); 6139 } 6140 6141 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6142 { 6143 Mat_Product *product = C->product; 6144 6145 PetscFunctionBegin; 6146 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6147 PetscFunctionReturn(PETSC_SUCCESS); 6148 } 6149 6150 /* 6151 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6152 6153 Input Parameters: 6154 6155 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6156 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6157 6158 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6159 6160 For Set1, j1[] contains column indices of the nonzeros. 6161 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6162 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6163 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6164 6165 Similar for Set2. 6166 6167 This routine merges the two sets of nonzeros row by row and removes repeats. 6168 6169 Output Parameters: (memory is allocated by the caller) 6170 6171 i[],j[]: the CSR of the merged matrix, which has m rows. 6172 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6173 imap2[]: similar to imap1[], but for Set2. 6174 Note we order nonzeros row-by-row and from left to right. 6175 */ 6176 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6177 { 6178 PetscInt r, m; /* Row index of mat */ 6179 PetscCount t, t1, t2, b1, e1, b2, e2; 6180 6181 PetscFunctionBegin; 6182 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6183 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6184 i[0] = 0; 6185 for (r = 0; r < m; r++) { /* Do row by row merging */ 6186 b1 = rowBegin1[r]; 6187 e1 = rowEnd1[r]; 6188 b2 = rowBegin2[r]; 6189 e2 = rowEnd2[r]; 6190 while (b1 < e1 && b2 < e2) { 6191 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6192 j[t] = j1[b1]; 6193 imap1[t1] = t; 6194 imap2[t2] = t; 6195 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6196 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6197 t1++; 6198 t2++; 6199 t++; 6200 } else if (j1[b1] < j2[b2]) { 6201 j[t] = j1[b1]; 6202 imap1[t1] = t; 6203 b1 += jmap1[t1 + 1] - jmap1[t1]; 6204 t1++; 6205 t++; 6206 } else { 6207 j[t] = j2[b2]; 6208 imap2[t2] = t; 6209 b2 += jmap2[t2 + 1] - jmap2[t2]; 6210 t2++; 6211 t++; 6212 } 6213 } 6214 /* Merge the remaining in either j1[] or j2[] */ 6215 while (b1 < e1) { 6216 j[t] = j1[b1]; 6217 imap1[t1] = t; 6218 b1 += jmap1[t1 + 1] - jmap1[t1]; 6219 t1++; 6220 t++; 6221 } 6222 while (b2 < e2) { 6223 j[t] = j2[b2]; 6224 imap2[t2] = t; 6225 b2 += jmap2[t2 + 1] - jmap2[t2]; 6226 t2++; 6227 t++; 6228 } 6229 PetscCall(PetscIntCast(t, i + r + 1)); 6230 } 6231 PetscFunctionReturn(PETSC_SUCCESS); 6232 } 6233 6234 /* 6235 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6236 6237 Input Parameters: 6238 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6239 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6240 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6241 6242 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6243 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6244 6245 Output Parameters: 6246 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6247 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6248 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6249 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6250 6251 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6252 Atot: number of entries belonging to the diagonal block. 6253 Annz: number of unique nonzeros belonging to the diagonal block. 6254 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6255 repeats (i.e., same 'i,j' pair). 6256 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6257 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6258 6259 Atot: number of entries belonging to the diagonal block 6260 Annz: number of unique nonzeros belonging to the diagonal block. 6261 6262 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6263 6264 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6265 */ 6266 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6267 { 6268 PetscInt cstart, cend, rstart, rend, row, col; 6269 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6270 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6271 PetscCount k, m, p, q, r, s, mid; 6272 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6273 6274 PetscFunctionBegin; 6275 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6276 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6277 m = rend - rstart; 6278 6279 /* Skip negative rows */ 6280 for (k = 0; k < n; k++) 6281 if (i[k] >= 0) break; 6282 6283 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6284 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6285 */ 6286 while (k < n) { 6287 row = i[k]; 6288 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6289 for (s = k; s < n; s++) 6290 if (i[s] != row) break; 6291 6292 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6293 for (p = k; p < s; p++) { 6294 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6295 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6296 } 6297 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6298 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6299 rowBegin[row - rstart] = k; 6300 rowMid[row - rstart] = mid; 6301 rowEnd[row - rstart] = s; 6302 6303 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6304 Atot += mid - k; 6305 Btot += s - mid; 6306 6307 /* Count unique nonzeros of this diag row */ 6308 for (p = k; p < mid;) { 6309 col = j[p]; 6310 do { 6311 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6312 p++; 6313 } while (p < mid && j[p] == col); 6314 Annz++; 6315 } 6316 6317 /* Count unique nonzeros of this offdiag row */ 6318 for (p = mid; p < s;) { 6319 col = j[p]; 6320 do { 6321 p++; 6322 } while (p < s && j[p] == col); 6323 Bnnz++; 6324 } 6325 k = s; 6326 } 6327 6328 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6329 PetscCall(PetscMalloc1(Atot, &Aperm)); 6330 PetscCall(PetscMalloc1(Btot, &Bperm)); 6331 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6332 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6333 6334 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6335 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6336 for (r = 0; r < m; r++) { 6337 k = rowBegin[r]; 6338 mid = rowMid[r]; 6339 s = rowEnd[r]; 6340 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6341 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6342 Atot += mid - k; 6343 Btot += s - mid; 6344 6345 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6346 for (p = k; p < mid;) { 6347 col = j[p]; 6348 q = p; 6349 do { 6350 p++; 6351 } while (p < mid && j[p] == col); 6352 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6353 Annz++; 6354 } 6355 6356 for (p = mid; p < s;) { 6357 col = j[p]; 6358 q = p; 6359 do { 6360 p++; 6361 } while (p < s && j[p] == col); 6362 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6363 Bnnz++; 6364 } 6365 } 6366 /* Output */ 6367 *Aperm_ = Aperm; 6368 *Annz_ = Annz; 6369 *Atot_ = Atot; 6370 *Ajmap_ = Ajmap; 6371 *Bperm_ = Bperm; 6372 *Bnnz_ = Bnnz; 6373 *Btot_ = Btot; 6374 *Bjmap_ = Bjmap; 6375 PetscFunctionReturn(PETSC_SUCCESS); 6376 } 6377 6378 /* 6379 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6380 6381 Input Parameters: 6382 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6383 nnz: number of unique nonzeros in the merged matrix 6384 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6385 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6386 6387 Output Parameter: (memory is allocated by the caller) 6388 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6389 6390 Example: 6391 nnz1 = 4 6392 nnz = 6 6393 imap = [1,3,4,5] 6394 jmap = [0,3,5,6,7] 6395 then, 6396 jmap_new = [0,0,3,3,5,6,7] 6397 */ 6398 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6399 { 6400 PetscCount k, p; 6401 6402 PetscFunctionBegin; 6403 jmap_new[0] = 0; 6404 p = nnz; /* p loops over jmap_new[] backwards */ 6405 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6406 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6407 } 6408 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6409 PetscFunctionReturn(PETSC_SUCCESS); 6410 } 6411 6412 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6413 { 6414 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6415 6416 PetscFunctionBegin; 6417 PetscCall(PetscSFDestroy(&coo->sf)); 6418 PetscCall(PetscFree(coo->Aperm1)); 6419 PetscCall(PetscFree(coo->Bperm1)); 6420 PetscCall(PetscFree(coo->Ajmap1)); 6421 PetscCall(PetscFree(coo->Bjmap1)); 6422 PetscCall(PetscFree(coo->Aimap2)); 6423 PetscCall(PetscFree(coo->Bimap2)); 6424 PetscCall(PetscFree(coo->Aperm2)); 6425 PetscCall(PetscFree(coo->Bperm2)); 6426 PetscCall(PetscFree(coo->Ajmap2)); 6427 PetscCall(PetscFree(coo->Bjmap2)); 6428 PetscCall(PetscFree(coo->Cperm1)); 6429 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6430 PetscCall(PetscFree(coo)); 6431 PetscFunctionReturn(PETSC_SUCCESS); 6432 } 6433 6434 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6435 { 6436 MPI_Comm comm; 6437 PetscMPIInt rank, size; 6438 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6439 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6440 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6441 PetscContainer container; 6442 MatCOOStruct_MPIAIJ *coo; 6443 6444 PetscFunctionBegin; 6445 PetscCall(PetscFree(mpiaij->garray)); 6446 PetscCall(VecDestroy(&mpiaij->lvec)); 6447 #if defined(PETSC_USE_CTABLE) 6448 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6449 #else 6450 PetscCall(PetscFree(mpiaij->colmap)); 6451 #endif 6452 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6453 mat->assembled = PETSC_FALSE; 6454 mat->was_assembled = PETSC_FALSE; 6455 6456 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6457 PetscCallMPI(MPI_Comm_size(comm, &size)); 6458 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6459 PetscCall(PetscLayoutSetUp(mat->rmap)); 6460 PetscCall(PetscLayoutSetUp(mat->cmap)); 6461 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6462 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6463 PetscCall(MatGetLocalSize(mat, &m, &n)); 6464 PetscCall(MatGetSize(mat, &M, &N)); 6465 6466 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6467 /* entries come first, then local rows, then remote rows. */ 6468 PetscCount n1 = coo_n, *perm1; 6469 PetscInt *i1 = coo_i, *j1 = coo_j; 6470 6471 PetscCall(PetscMalloc1(n1, &perm1)); 6472 for (k = 0; k < n1; k++) perm1[k] = k; 6473 6474 /* Manipulate indices so that entries with negative row or col indices will have smallest 6475 row indices, local entries will have greater but negative row indices, and remote entries 6476 will have positive row indices. 6477 */ 6478 for (k = 0; k < n1; k++) { 6479 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6480 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6481 else { 6482 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6483 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6484 } 6485 } 6486 6487 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6488 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6489 6490 /* Advance k to the first entry we need to take care of */ 6491 for (k = 0; k < n1; k++) 6492 if (i1[k] > PETSC_INT_MIN) break; 6493 PetscCount i1start = k; 6494 6495 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6496 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6497 6498 /* Send remote rows to their owner */ 6499 /* Find which rows should be sent to which remote ranks*/ 6500 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6501 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6502 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6503 const PetscInt *ranges; 6504 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6505 6506 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6507 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6508 for (k = rem; k < n1;) { 6509 PetscMPIInt owner; 6510 PetscInt firstRow, lastRow; 6511 6512 /* Locate a row range */ 6513 firstRow = i1[k]; /* first row of this owner */ 6514 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6515 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6516 6517 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6518 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6519 6520 /* All entries in [k,p) belong to this remote owner */ 6521 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6522 PetscMPIInt *sendto2; 6523 PetscInt *nentries2; 6524 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6525 6526 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6527 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6528 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6529 PetscCall(PetscFree2(sendto, nentries2)); 6530 sendto = sendto2; 6531 nentries = nentries2; 6532 maxNsend = maxNsend2; 6533 } 6534 sendto[nsend] = owner; 6535 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6536 nsend++; 6537 k = p; 6538 } 6539 6540 /* Build 1st SF to know offsets on remote to send data */ 6541 PetscSF sf1; 6542 PetscInt nroots = 1, nroots2 = 0; 6543 PetscInt nleaves = nsend, nleaves2 = 0; 6544 PetscInt *offsets; 6545 PetscSFNode *iremote; 6546 6547 PetscCall(PetscSFCreate(comm, &sf1)); 6548 PetscCall(PetscMalloc1(nsend, &iremote)); 6549 PetscCall(PetscMalloc1(nsend, &offsets)); 6550 for (k = 0; k < nsend; k++) { 6551 iremote[k].rank = sendto[k]; 6552 iremote[k].index = 0; 6553 nleaves2 += nentries[k]; 6554 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6555 } 6556 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6557 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6558 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6559 PetscCall(PetscSFDestroy(&sf1)); 6560 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6561 6562 /* Build 2nd SF to send remote COOs to their owner */ 6563 PetscSF sf2; 6564 nroots = nroots2; 6565 nleaves = nleaves2; 6566 PetscCall(PetscSFCreate(comm, &sf2)); 6567 PetscCall(PetscSFSetFromOptions(sf2)); 6568 PetscCall(PetscMalloc1(nleaves, &iremote)); 6569 p = 0; 6570 for (k = 0; k < nsend; k++) { 6571 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6572 for (q = 0; q < nentries[k]; q++, p++) { 6573 iremote[p].rank = sendto[k]; 6574 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6575 } 6576 } 6577 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6578 6579 /* Send the remote COOs to their owner */ 6580 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6581 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6582 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6583 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6584 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6585 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6586 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6587 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6588 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6589 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6590 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6591 6592 PetscCall(PetscFree(offsets)); 6593 PetscCall(PetscFree2(sendto, nentries)); 6594 6595 /* Sort received COOs by row along with the permutation array */ 6596 for (k = 0; k < n2; k++) perm2[k] = k; 6597 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6598 6599 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6600 PetscCount *Cperm1; 6601 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6602 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6603 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6604 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6605 6606 /* Support for HYPRE matrices, kind of a hack. 6607 Swap min column with diagonal so that diagonal values will go first */ 6608 PetscBool hypre; 6609 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6610 if (hypre) { 6611 PetscInt *minj; 6612 PetscBT hasdiag; 6613 6614 PetscCall(PetscBTCreate(m, &hasdiag)); 6615 PetscCall(PetscMalloc1(m, &minj)); 6616 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6617 for (k = i1start; k < rem; k++) { 6618 if (j1[k] < cstart || j1[k] >= cend) continue; 6619 const PetscInt rindex = i1[k] - rstart; 6620 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6621 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6622 } 6623 for (k = 0; k < n2; k++) { 6624 if (j2[k] < cstart || j2[k] >= cend) continue; 6625 const PetscInt rindex = i2[k] - rstart; 6626 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6627 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6628 } 6629 for (k = i1start; k < rem; k++) { 6630 const PetscInt rindex = i1[k] - rstart; 6631 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6632 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6633 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6634 } 6635 for (k = 0; k < n2; k++) { 6636 const PetscInt rindex = i2[k] - rstart; 6637 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6638 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6639 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6640 } 6641 PetscCall(PetscBTDestroy(&hasdiag)); 6642 PetscCall(PetscFree(minj)); 6643 } 6644 6645 /* Split local COOs and received COOs into diag/offdiag portions */ 6646 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6647 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6648 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6649 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6650 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6651 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6652 6653 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6654 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6655 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6656 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6657 6658 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6659 PetscInt *Ai, *Bi; 6660 PetscInt *Aj, *Bj; 6661 6662 PetscCall(PetscMalloc1(m + 1, &Ai)); 6663 PetscCall(PetscMalloc1(m + 1, &Bi)); 6664 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6665 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6666 6667 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6668 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6669 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6670 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6671 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6672 6673 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6674 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6675 6676 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6677 /* expect nonzeros in A/B most likely have local contributing entries */ 6678 PetscInt Annz = Ai[m]; 6679 PetscInt Bnnz = Bi[m]; 6680 PetscCount *Ajmap1_new, *Bjmap1_new; 6681 6682 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6683 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6684 6685 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6686 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6687 6688 PetscCall(PetscFree(Aimap1)); 6689 PetscCall(PetscFree(Ajmap1)); 6690 PetscCall(PetscFree(Bimap1)); 6691 PetscCall(PetscFree(Bjmap1)); 6692 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6693 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6694 PetscCall(PetscFree(perm1)); 6695 PetscCall(PetscFree3(i2, j2, perm2)); 6696 6697 Ajmap1 = Ajmap1_new; 6698 Bjmap1 = Bjmap1_new; 6699 6700 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6701 if (Annz < Annz1 + Annz2) { 6702 PetscInt *Aj_new; 6703 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6704 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6705 PetscCall(PetscFree(Aj)); 6706 Aj = Aj_new; 6707 } 6708 6709 if (Bnnz < Bnnz1 + Bnnz2) { 6710 PetscInt *Bj_new; 6711 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6712 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6713 PetscCall(PetscFree(Bj)); 6714 Bj = Bj_new; 6715 } 6716 6717 /* Create new submatrices for on-process and off-process coupling */ 6718 PetscScalar *Aa, *Ba; 6719 MatType rtype; 6720 Mat_SeqAIJ *a, *b; 6721 PetscObjectState state; 6722 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6723 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6724 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6725 if (cstart) { 6726 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6727 } 6728 6729 PetscCall(MatGetRootType_Private(mat, &rtype)); 6730 6731 MatSeqXAIJGetOptions_Private(mpiaij->A); 6732 PetscCall(MatDestroy(&mpiaij->A)); 6733 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6734 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6735 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6736 6737 MatSeqXAIJGetOptions_Private(mpiaij->B); 6738 PetscCall(MatDestroy(&mpiaij->B)); 6739 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6740 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6741 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6742 6743 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6744 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6745 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6746 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6747 6748 a = (Mat_SeqAIJ *)mpiaij->A->data; 6749 b = (Mat_SeqAIJ *)mpiaij->B->data; 6750 a->free_a = PETSC_TRUE; 6751 a->free_ij = PETSC_TRUE; 6752 b->free_a = PETSC_TRUE; 6753 b->free_ij = PETSC_TRUE; 6754 a->maxnz = a->nz; 6755 b->maxnz = b->nz; 6756 6757 /* conversion must happen AFTER multiply setup */ 6758 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6759 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6760 PetscCall(VecDestroy(&mpiaij->lvec)); 6761 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6762 6763 // Put the COO struct in a container and then attach that to the matrix 6764 PetscCall(PetscMalloc1(1, &coo)); 6765 coo->n = coo_n; 6766 coo->sf = sf2; 6767 coo->sendlen = nleaves; 6768 coo->recvlen = nroots; 6769 coo->Annz = Annz; 6770 coo->Bnnz = Bnnz; 6771 coo->Annz2 = Annz2; 6772 coo->Bnnz2 = Bnnz2; 6773 coo->Atot1 = Atot1; 6774 coo->Atot2 = Atot2; 6775 coo->Btot1 = Btot1; 6776 coo->Btot2 = Btot2; 6777 coo->Ajmap1 = Ajmap1; 6778 coo->Aperm1 = Aperm1; 6779 coo->Bjmap1 = Bjmap1; 6780 coo->Bperm1 = Bperm1; 6781 coo->Aimap2 = Aimap2; 6782 coo->Ajmap2 = Ajmap2; 6783 coo->Aperm2 = Aperm2; 6784 coo->Bimap2 = Bimap2; 6785 coo->Bjmap2 = Bjmap2; 6786 coo->Bperm2 = Bperm2; 6787 coo->Cperm1 = Cperm1; 6788 // Allocate in preallocation. If not used, it has zero cost on host 6789 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6790 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6791 PetscCall(PetscContainerSetPointer(container, coo)); 6792 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6793 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6794 PetscCall(PetscContainerDestroy(&container)); 6795 PetscFunctionReturn(PETSC_SUCCESS); 6796 } 6797 6798 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6799 { 6800 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6801 Mat A = mpiaij->A, B = mpiaij->B; 6802 PetscScalar *Aa, *Ba; 6803 PetscScalar *sendbuf, *recvbuf; 6804 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6805 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6806 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6807 const PetscCount *Cperm1; 6808 PetscContainer container; 6809 MatCOOStruct_MPIAIJ *coo; 6810 6811 PetscFunctionBegin; 6812 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6813 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6814 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6815 sendbuf = coo->sendbuf; 6816 recvbuf = coo->recvbuf; 6817 Ajmap1 = coo->Ajmap1; 6818 Ajmap2 = coo->Ajmap2; 6819 Aimap2 = coo->Aimap2; 6820 Bjmap1 = coo->Bjmap1; 6821 Bjmap2 = coo->Bjmap2; 6822 Bimap2 = coo->Bimap2; 6823 Aperm1 = coo->Aperm1; 6824 Aperm2 = coo->Aperm2; 6825 Bperm1 = coo->Bperm1; 6826 Bperm2 = coo->Bperm2; 6827 Cperm1 = coo->Cperm1; 6828 6829 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6830 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6831 6832 /* Pack entries to be sent to remote */ 6833 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6834 6835 /* Send remote entries to their owner and overlap the communication with local computation */ 6836 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6837 /* Add local entries to A and B */ 6838 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6839 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6840 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6841 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6842 } 6843 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6844 PetscScalar sum = 0.0; 6845 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6846 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6847 } 6848 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6849 6850 /* Add received remote entries to A and B */ 6851 for (PetscCount i = 0; i < coo->Annz2; i++) { 6852 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6853 } 6854 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6855 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6856 } 6857 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6858 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6859 PetscFunctionReturn(PETSC_SUCCESS); 6860 } 6861 6862 /*MC 6863 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6864 6865 Options Database Keys: 6866 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6867 6868 Level: beginner 6869 6870 Notes: 6871 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6872 in this case the values associated with the rows and columns one passes in are set to zero 6873 in the matrix 6874 6875 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6876 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6877 6878 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6879 M*/ 6880 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6881 { 6882 Mat_MPIAIJ *b; 6883 PetscMPIInt size; 6884 6885 PetscFunctionBegin; 6886 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6887 6888 PetscCall(PetscNew(&b)); 6889 B->data = (void *)b; 6890 B->ops[0] = MatOps_Values; 6891 B->assembled = PETSC_FALSE; 6892 B->insertmode = NOT_SET_VALUES; 6893 b->size = size; 6894 6895 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6896 6897 /* build cache for off array entries formed */ 6898 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6899 6900 b->donotstash = PETSC_FALSE; 6901 b->colmap = NULL; 6902 b->garray = NULL; 6903 b->roworiented = PETSC_TRUE; 6904 6905 /* stuff used for matrix vector multiply */ 6906 b->lvec = NULL; 6907 b->Mvctx = NULL; 6908 6909 /* stuff for MatGetRow() */ 6910 b->rowindices = NULL; 6911 b->rowvalues = NULL; 6912 b->getrowactive = PETSC_FALSE; 6913 6914 /* flexible pointer used in CUSPARSE classes */ 6915 b->spptr = NULL; 6916 6917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6928 #if defined(PETSC_HAVE_CUDA) 6929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6930 #endif 6931 #if defined(PETSC_HAVE_HIP) 6932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6933 #endif 6934 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6936 #endif 6937 #if defined(PETSC_HAVE_MKL_SPARSE) 6938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6939 #endif 6940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6944 #if defined(PETSC_HAVE_ELEMENTAL) 6945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6946 #endif 6947 #if defined(PETSC_HAVE_SCALAPACK) 6948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6949 #endif 6950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6951 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6952 #if defined(PETSC_HAVE_HYPRE) 6953 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6954 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6955 #endif 6956 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6957 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6958 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6959 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6960 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6961 PetscFunctionReturn(PETSC_SUCCESS); 6962 } 6963 6964 /*@ 6965 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6966 and "off-diagonal" part of the matrix in CSR format. 6967 6968 Collective 6969 6970 Input Parameters: 6971 + comm - MPI communicator 6972 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6973 . n - This value should be the same as the local size used in creating the 6974 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6975 calculated if `N` is given) For square matrices `n` is almost always `m`. 6976 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6977 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6978 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6979 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6980 . a - matrix values 6981 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6982 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6983 - oa - matrix values 6984 6985 Output Parameter: 6986 . mat - the matrix 6987 6988 Level: advanced 6989 6990 Notes: 6991 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6992 must free the arrays once the matrix has been destroyed and not before. 6993 6994 The `i` and `j` indices are 0 based 6995 6996 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6997 6998 This sets local rows and cannot be used to set off-processor values. 6999 7000 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 7001 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 7002 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 7003 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 7004 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 7005 communication if it is known that only local entries will be set. 7006 7007 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 7008 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 7009 @*/ 7010 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 7011 { 7012 Mat_MPIAIJ *maij; 7013 7014 PetscFunctionBegin; 7015 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 7016 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 7017 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 7018 PetscCall(MatCreate(comm, mat)); 7019 PetscCall(MatSetSizes(*mat, m, n, M, N)); 7020 PetscCall(MatSetType(*mat, MATMPIAIJ)); 7021 maij = (Mat_MPIAIJ *)(*mat)->data; 7022 7023 (*mat)->preallocated = PETSC_TRUE; 7024 7025 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 7026 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 7027 7028 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 7029 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 7030 7031 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7032 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 7033 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 7034 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 7035 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 7036 PetscFunctionReturn(PETSC_SUCCESS); 7037 } 7038 7039 typedef struct { 7040 Mat *mp; /* intermediate products */ 7041 PetscBool *mptmp; /* is the intermediate product temporary ? */ 7042 PetscInt cp; /* number of intermediate products */ 7043 7044 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 7045 PetscInt *startsj_s, *startsj_r; 7046 PetscScalar *bufa; 7047 Mat P_oth; 7048 7049 /* may take advantage of merging product->B */ 7050 Mat Bloc; /* B-local by merging diag and off-diag */ 7051 7052 /* cusparse does not have support to split between symbolic and numeric phases. 7053 When api_user is true, we don't need to update the numerical values 7054 of the temporary storage */ 7055 PetscBool reusesym; 7056 7057 /* support for COO values insertion */ 7058 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 7059 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 7060 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 7061 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 7062 PetscSF sf; /* used for non-local values insertion and memory malloc */ 7063 PetscMemType mtype; 7064 7065 /* customization */ 7066 PetscBool abmerge; 7067 PetscBool P_oth_bind; 7068 } MatMatMPIAIJBACKEND; 7069 7070 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 7071 { 7072 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 7073 PetscInt i; 7074 7075 PetscFunctionBegin; 7076 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7077 PetscCall(PetscFree(mmdata->bufa)); 7078 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7079 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7080 PetscCall(MatDestroy(&mmdata->P_oth)); 7081 PetscCall(MatDestroy(&mmdata->Bloc)); 7082 PetscCall(PetscSFDestroy(&mmdata->sf)); 7083 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7084 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7085 PetscCall(PetscFree(mmdata->own[0])); 7086 PetscCall(PetscFree(mmdata->own)); 7087 PetscCall(PetscFree(mmdata->off[0])); 7088 PetscCall(PetscFree(mmdata->off)); 7089 PetscCall(PetscFree(mmdata)); 7090 PetscFunctionReturn(PETSC_SUCCESS); 7091 } 7092 7093 /* Copy selected n entries with indices in idx[] of A to v[]. 7094 If idx is NULL, copy the whole data array of A to v[] 7095 */ 7096 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7097 { 7098 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7099 7100 PetscFunctionBegin; 7101 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7102 if (f) { 7103 PetscCall((*f)(A, n, idx, v)); 7104 } else { 7105 const PetscScalar *vv; 7106 7107 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7108 if (n && idx) { 7109 PetscScalar *w = v; 7110 const PetscInt *oi = idx; 7111 PetscInt j; 7112 7113 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7114 } else { 7115 PetscCall(PetscArraycpy(v, vv, n)); 7116 } 7117 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7118 } 7119 PetscFunctionReturn(PETSC_SUCCESS); 7120 } 7121 7122 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7123 { 7124 MatMatMPIAIJBACKEND *mmdata; 7125 PetscInt i, n_d, n_o; 7126 7127 PetscFunctionBegin; 7128 MatCheckProduct(C, 1); 7129 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7130 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7131 if (!mmdata->reusesym) { /* update temporary matrices */ 7132 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7133 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7134 } 7135 mmdata->reusesym = PETSC_FALSE; 7136 7137 for (i = 0; i < mmdata->cp; i++) { 7138 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7139 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7140 } 7141 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7142 PetscInt noff; 7143 7144 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7145 if (mmdata->mptmp[i]) continue; 7146 if (noff) { 7147 PetscInt nown; 7148 7149 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7150 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7151 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7152 n_o += noff; 7153 n_d += nown; 7154 } else { 7155 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7156 7157 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7158 n_d += mm->nz; 7159 } 7160 } 7161 if (mmdata->hasoffproc) { /* offprocess insertion */ 7162 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7163 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7164 } 7165 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7166 PetscFunctionReturn(PETSC_SUCCESS); 7167 } 7168 7169 /* Support for Pt * A, A * P, or Pt * A * P */ 7170 #define MAX_NUMBER_INTERMEDIATE 4 7171 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7172 { 7173 Mat_Product *product = C->product; 7174 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7175 Mat_MPIAIJ *a, *p; 7176 MatMatMPIAIJBACKEND *mmdata; 7177 ISLocalToGlobalMapping P_oth_l2g = NULL; 7178 IS glob = NULL; 7179 const char *prefix; 7180 char pprefix[256]; 7181 const PetscInt *globidx, *P_oth_idx; 7182 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7183 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7184 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7185 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7186 /* a base offset; type-2: sparse with a local to global map table */ 7187 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7188 7189 MatProductType ptype; 7190 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7191 PetscMPIInt size; 7192 7193 PetscFunctionBegin; 7194 MatCheckProduct(C, 1); 7195 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7196 ptype = product->type; 7197 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7198 ptype = MATPRODUCT_AB; 7199 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7200 } 7201 switch (ptype) { 7202 case MATPRODUCT_AB: 7203 A = product->A; 7204 P = product->B; 7205 m = A->rmap->n; 7206 n = P->cmap->n; 7207 M = A->rmap->N; 7208 N = P->cmap->N; 7209 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7210 break; 7211 case MATPRODUCT_AtB: 7212 P = product->A; 7213 A = product->B; 7214 m = P->cmap->n; 7215 n = A->cmap->n; 7216 M = P->cmap->N; 7217 N = A->cmap->N; 7218 hasoffproc = PETSC_TRUE; 7219 break; 7220 case MATPRODUCT_PtAP: 7221 A = product->A; 7222 P = product->B; 7223 m = P->cmap->n; 7224 n = P->cmap->n; 7225 M = P->cmap->N; 7226 N = P->cmap->N; 7227 hasoffproc = PETSC_TRUE; 7228 break; 7229 default: 7230 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7231 } 7232 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7233 if (size == 1) hasoffproc = PETSC_FALSE; 7234 7235 /* defaults */ 7236 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7237 mp[i] = NULL; 7238 mptmp[i] = PETSC_FALSE; 7239 rmapt[i] = -1; 7240 cmapt[i] = -1; 7241 rmapa[i] = NULL; 7242 cmapa[i] = NULL; 7243 } 7244 7245 /* customization */ 7246 PetscCall(PetscNew(&mmdata)); 7247 mmdata->reusesym = product->api_user; 7248 if (ptype == MATPRODUCT_AB) { 7249 if (product->api_user) { 7250 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7251 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7252 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7253 PetscOptionsEnd(); 7254 } else { 7255 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7256 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7257 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7258 PetscOptionsEnd(); 7259 } 7260 } else if (ptype == MATPRODUCT_PtAP) { 7261 if (product->api_user) { 7262 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7263 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7264 PetscOptionsEnd(); 7265 } else { 7266 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7267 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7268 PetscOptionsEnd(); 7269 } 7270 } 7271 a = (Mat_MPIAIJ *)A->data; 7272 p = (Mat_MPIAIJ *)P->data; 7273 PetscCall(MatSetSizes(C, m, n, M, N)); 7274 PetscCall(PetscLayoutSetUp(C->rmap)); 7275 PetscCall(PetscLayoutSetUp(C->cmap)); 7276 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7277 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7278 7279 cp = 0; 7280 switch (ptype) { 7281 case MATPRODUCT_AB: /* A * P */ 7282 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7283 7284 /* A_diag * P_local (merged or not) */ 7285 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7286 /* P is product->B */ 7287 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7288 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7289 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7290 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7291 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7292 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7293 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7294 mp[cp]->product->api_user = product->api_user; 7295 PetscCall(MatProductSetFromOptions(mp[cp])); 7296 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7297 PetscCall(ISGetIndices(glob, &globidx)); 7298 rmapt[cp] = 1; 7299 cmapt[cp] = 2; 7300 cmapa[cp] = globidx; 7301 mptmp[cp] = PETSC_FALSE; 7302 cp++; 7303 } else { /* A_diag * P_diag and A_diag * P_off */ 7304 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7305 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7306 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7307 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7308 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7309 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7310 mp[cp]->product->api_user = product->api_user; 7311 PetscCall(MatProductSetFromOptions(mp[cp])); 7312 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7313 rmapt[cp] = 1; 7314 cmapt[cp] = 1; 7315 mptmp[cp] = PETSC_FALSE; 7316 cp++; 7317 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7318 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7319 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7320 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7321 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7322 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7323 mp[cp]->product->api_user = product->api_user; 7324 PetscCall(MatProductSetFromOptions(mp[cp])); 7325 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7326 rmapt[cp] = 1; 7327 cmapt[cp] = 2; 7328 cmapa[cp] = p->garray; 7329 mptmp[cp] = PETSC_FALSE; 7330 cp++; 7331 } 7332 7333 /* A_off * P_other */ 7334 if (mmdata->P_oth) { 7335 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7336 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7337 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7338 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7339 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7340 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7341 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7342 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7343 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7344 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7345 mp[cp]->product->api_user = product->api_user; 7346 PetscCall(MatProductSetFromOptions(mp[cp])); 7347 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7348 rmapt[cp] = 1; 7349 cmapt[cp] = 2; 7350 cmapa[cp] = P_oth_idx; 7351 mptmp[cp] = PETSC_FALSE; 7352 cp++; 7353 } 7354 break; 7355 7356 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7357 /* A is product->B */ 7358 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7359 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7360 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7361 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7362 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7363 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7364 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7365 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7366 mp[cp]->product->api_user = product->api_user; 7367 PetscCall(MatProductSetFromOptions(mp[cp])); 7368 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7369 PetscCall(ISGetIndices(glob, &globidx)); 7370 rmapt[cp] = 2; 7371 rmapa[cp] = globidx; 7372 cmapt[cp] = 2; 7373 cmapa[cp] = globidx; 7374 mptmp[cp] = PETSC_FALSE; 7375 cp++; 7376 } else { 7377 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7378 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7379 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7380 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7381 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7382 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7383 mp[cp]->product->api_user = product->api_user; 7384 PetscCall(MatProductSetFromOptions(mp[cp])); 7385 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7386 PetscCall(ISGetIndices(glob, &globidx)); 7387 rmapt[cp] = 1; 7388 cmapt[cp] = 2; 7389 cmapa[cp] = globidx; 7390 mptmp[cp] = PETSC_FALSE; 7391 cp++; 7392 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7393 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7394 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7395 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7396 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7397 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7398 mp[cp]->product->api_user = product->api_user; 7399 PetscCall(MatProductSetFromOptions(mp[cp])); 7400 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7401 rmapt[cp] = 2; 7402 rmapa[cp] = p->garray; 7403 cmapt[cp] = 2; 7404 cmapa[cp] = globidx; 7405 mptmp[cp] = PETSC_FALSE; 7406 cp++; 7407 } 7408 break; 7409 case MATPRODUCT_PtAP: 7410 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7411 /* P is product->B */ 7412 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7413 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7414 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7415 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7416 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7417 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7418 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7419 mp[cp]->product->api_user = product->api_user; 7420 PetscCall(MatProductSetFromOptions(mp[cp])); 7421 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7422 PetscCall(ISGetIndices(glob, &globidx)); 7423 rmapt[cp] = 2; 7424 rmapa[cp] = globidx; 7425 cmapt[cp] = 2; 7426 cmapa[cp] = globidx; 7427 mptmp[cp] = PETSC_FALSE; 7428 cp++; 7429 if (mmdata->P_oth) { 7430 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7431 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7432 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7433 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7434 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7435 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7436 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7437 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7438 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7439 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7440 mp[cp]->product->api_user = product->api_user; 7441 PetscCall(MatProductSetFromOptions(mp[cp])); 7442 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7443 mptmp[cp] = PETSC_TRUE; 7444 cp++; 7445 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7446 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7447 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7448 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7449 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7450 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7451 mp[cp]->product->api_user = product->api_user; 7452 PetscCall(MatProductSetFromOptions(mp[cp])); 7453 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7454 rmapt[cp] = 2; 7455 rmapa[cp] = globidx; 7456 cmapt[cp] = 2; 7457 cmapa[cp] = P_oth_idx; 7458 mptmp[cp] = PETSC_FALSE; 7459 cp++; 7460 } 7461 break; 7462 default: 7463 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7464 } 7465 /* sanity check */ 7466 if (size > 1) 7467 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7468 7469 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7470 for (i = 0; i < cp; i++) { 7471 mmdata->mp[i] = mp[i]; 7472 mmdata->mptmp[i] = mptmp[i]; 7473 } 7474 mmdata->cp = cp; 7475 C->product->data = mmdata; 7476 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7477 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7478 7479 /* memory type */ 7480 mmdata->mtype = PETSC_MEMTYPE_HOST; 7481 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7482 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7483 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7484 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7485 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7486 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7487 7488 /* prepare coo coordinates for values insertion */ 7489 7490 /* count total nonzeros of those intermediate seqaij Mats 7491 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7492 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7493 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7494 */ 7495 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7496 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7497 if (mptmp[cp]) continue; 7498 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7499 const PetscInt *rmap = rmapa[cp]; 7500 const PetscInt mr = mp[cp]->rmap->n; 7501 const PetscInt rs = C->rmap->rstart; 7502 const PetscInt re = C->rmap->rend; 7503 const PetscInt *ii = mm->i; 7504 for (i = 0; i < mr; i++) { 7505 const PetscInt gr = rmap[i]; 7506 const PetscInt nz = ii[i + 1] - ii[i]; 7507 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7508 else ncoo_oown += nz; /* this row is local */ 7509 } 7510 } else ncoo_d += mm->nz; 7511 } 7512 7513 /* 7514 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7515 7516 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7517 7518 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7519 7520 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7521 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7522 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7523 7524 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7525 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7526 */ 7527 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7528 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7529 7530 /* gather (i,j) of nonzeros inserted by remote procs */ 7531 if (hasoffproc) { 7532 PetscSF msf; 7533 PetscInt ncoo2, *coo_i2, *coo_j2; 7534 7535 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7536 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7537 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7538 7539 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7540 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7541 PetscInt *idxoff = mmdata->off[cp]; 7542 PetscInt *idxown = mmdata->own[cp]; 7543 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7544 const PetscInt *rmap = rmapa[cp]; 7545 const PetscInt *cmap = cmapa[cp]; 7546 const PetscInt *ii = mm->i; 7547 PetscInt *coi = coo_i + ncoo_o; 7548 PetscInt *coj = coo_j + ncoo_o; 7549 const PetscInt mr = mp[cp]->rmap->n; 7550 const PetscInt rs = C->rmap->rstart; 7551 const PetscInt re = C->rmap->rend; 7552 const PetscInt cs = C->cmap->rstart; 7553 for (i = 0; i < mr; i++) { 7554 const PetscInt *jj = mm->j + ii[i]; 7555 const PetscInt gr = rmap[i]; 7556 const PetscInt nz = ii[i + 1] - ii[i]; 7557 if (gr < rs || gr >= re) { /* this is an offproc row */ 7558 for (j = ii[i]; j < ii[i + 1]; j++) { 7559 *coi++ = gr; 7560 *idxoff++ = j; 7561 } 7562 if (!cmapt[cp]) { /* already global */ 7563 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7564 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7565 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7566 } else { /* offdiag */ 7567 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7568 } 7569 ncoo_o += nz; 7570 } else { /* this is a local row */ 7571 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7572 } 7573 } 7574 } 7575 mmdata->off[cp + 1] = idxoff; 7576 mmdata->own[cp + 1] = idxown; 7577 } 7578 7579 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7580 PetscInt incoo_o; 7581 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7582 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7583 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7584 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7585 ncoo = ncoo_d + ncoo_oown + ncoo2; 7586 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7587 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7588 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7589 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7590 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7591 PetscCall(PetscFree2(coo_i, coo_j)); 7592 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7593 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7594 coo_i = coo_i2; 7595 coo_j = coo_j2; 7596 } else { /* no offproc values insertion */ 7597 ncoo = ncoo_d; 7598 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7599 7600 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7601 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7602 PetscCall(PetscSFSetUp(mmdata->sf)); 7603 } 7604 mmdata->hasoffproc = hasoffproc; 7605 7606 /* gather (i,j) of nonzeros inserted locally */ 7607 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7608 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7609 PetscInt *coi = coo_i + ncoo_d; 7610 PetscInt *coj = coo_j + ncoo_d; 7611 const PetscInt *jj = mm->j; 7612 const PetscInt *ii = mm->i; 7613 const PetscInt *cmap = cmapa[cp]; 7614 const PetscInt *rmap = rmapa[cp]; 7615 const PetscInt mr = mp[cp]->rmap->n; 7616 const PetscInt rs = C->rmap->rstart; 7617 const PetscInt re = C->rmap->rend; 7618 const PetscInt cs = C->cmap->rstart; 7619 7620 if (mptmp[cp]) continue; 7621 if (rmapt[cp] == 1) { /* consecutive rows */ 7622 /* fill coo_i */ 7623 for (i = 0; i < mr; i++) { 7624 const PetscInt gr = i + rs; 7625 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7626 } 7627 /* fill coo_j */ 7628 if (!cmapt[cp]) { /* type-0, already global */ 7629 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7630 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7631 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7632 } else { /* type-2, local to global for sparse columns */ 7633 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7634 } 7635 ncoo_d += mm->nz; 7636 } else if (rmapt[cp] == 2) { /* sparse rows */ 7637 for (i = 0; i < mr; i++) { 7638 const PetscInt *jj = mm->j + ii[i]; 7639 const PetscInt gr = rmap[i]; 7640 const PetscInt nz = ii[i + 1] - ii[i]; 7641 if (gr >= rs && gr < re) { /* local rows */ 7642 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7643 if (!cmapt[cp]) { /* type-0, already global */ 7644 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7645 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7646 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7647 } else { /* type-2, local to global for sparse columns */ 7648 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7649 } 7650 ncoo_d += nz; 7651 } 7652 } 7653 } 7654 } 7655 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7656 PetscCall(ISDestroy(&glob)); 7657 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7658 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7659 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7660 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7661 7662 /* preallocate with COO data */ 7663 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7664 PetscCall(PetscFree2(coo_i, coo_j)); 7665 PetscFunctionReturn(PETSC_SUCCESS); 7666 } 7667 7668 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7669 { 7670 Mat_Product *product = mat->product; 7671 #if defined(PETSC_HAVE_DEVICE) 7672 PetscBool match = PETSC_FALSE; 7673 PetscBool usecpu = PETSC_FALSE; 7674 #else 7675 PetscBool match = PETSC_TRUE; 7676 #endif 7677 7678 PetscFunctionBegin; 7679 MatCheckProduct(mat, 1); 7680 #if defined(PETSC_HAVE_DEVICE) 7681 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7682 if (match) { /* we can always fallback to the CPU if requested */ 7683 switch (product->type) { 7684 case MATPRODUCT_AB: 7685 if (product->api_user) { 7686 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7687 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7688 PetscOptionsEnd(); 7689 } else { 7690 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7691 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7692 PetscOptionsEnd(); 7693 } 7694 break; 7695 case MATPRODUCT_AtB: 7696 if (product->api_user) { 7697 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7698 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7699 PetscOptionsEnd(); 7700 } else { 7701 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7702 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7703 PetscOptionsEnd(); 7704 } 7705 break; 7706 case MATPRODUCT_PtAP: 7707 if (product->api_user) { 7708 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7709 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7710 PetscOptionsEnd(); 7711 } else { 7712 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7713 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7714 PetscOptionsEnd(); 7715 } 7716 break; 7717 default: 7718 break; 7719 } 7720 match = (PetscBool)!usecpu; 7721 } 7722 #endif 7723 if (match) { 7724 switch (product->type) { 7725 case MATPRODUCT_AB: 7726 case MATPRODUCT_AtB: 7727 case MATPRODUCT_PtAP: 7728 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7729 break; 7730 default: 7731 break; 7732 } 7733 } 7734 /* fallback to MPIAIJ ops */ 7735 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7736 PetscFunctionReturn(PETSC_SUCCESS); 7737 } 7738 7739 /* 7740 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7741 7742 n - the number of block indices in cc[] 7743 cc - the block indices (must be large enough to contain the indices) 7744 */ 7745 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7746 { 7747 PetscInt cnt = -1, nidx, j; 7748 const PetscInt *idx; 7749 7750 PetscFunctionBegin; 7751 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7752 if (nidx) { 7753 cnt = 0; 7754 cc[cnt] = idx[0] / bs; 7755 for (j = 1; j < nidx; j++) { 7756 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7757 } 7758 } 7759 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7760 *n = cnt + 1; 7761 PetscFunctionReturn(PETSC_SUCCESS); 7762 } 7763 7764 /* 7765 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7766 7767 ncollapsed - the number of block indices 7768 collapsed - the block indices (must be large enough to contain the indices) 7769 */ 7770 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7771 { 7772 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7773 7774 PetscFunctionBegin; 7775 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7776 for (i = start + 1; i < start + bs; i++) { 7777 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7778 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7779 cprevtmp = cprev; 7780 cprev = merged; 7781 merged = cprevtmp; 7782 } 7783 *ncollapsed = nprev; 7784 if (collapsed) *collapsed = cprev; 7785 PetscFunctionReturn(PETSC_SUCCESS); 7786 } 7787 7788 /* 7789 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7790 7791 Input Parameter: 7792 . Amat - matrix 7793 - symmetrize - make the result symmetric 7794 + scale - scale with diagonal 7795 7796 Output Parameter: 7797 . a_Gmat - output scalar graph >= 0 7798 7799 */ 7800 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7801 { 7802 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7803 MPI_Comm comm; 7804 Mat Gmat; 7805 PetscBool ismpiaij, isseqaij; 7806 Mat a, b, c; 7807 MatType jtype; 7808 7809 PetscFunctionBegin; 7810 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7811 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7812 PetscCall(MatGetSize(Amat, &MM, &NN)); 7813 PetscCall(MatGetBlockSize(Amat, &bs)); 7814 nloc = (Iend - Istart) / bs; 7815 7816 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7817 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7818 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7819 7820 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7821 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7822 implementation */ 7823 if (bs > 1) { 7824 PetscCall(MatGetType(Amat, &jtype)); 7825 PetscCall(MatCreate(comm, &Gmat)); 7826 PetscCall(MatSetType(Gmat, jtype)); 7827 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7828 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7829 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7830 PetscInt *d_nnz, *o_nnz; 7831 MatScalar *aa, val, *AA; 7832 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7833 7834 if (isseqaij) { 7835 a = Amat; 7836 b = NULL; 7837 } else { 7838 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7839 a = d->A; 7840 b = d->B; 7841 } 7842 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7843 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7844 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7845 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7846 const PetscInt *cols1, *cols2; 7847 7848 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7849 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7850 nnz[brow / bs] = nc2 / bs; 7851 if (nc2 % bs) ok = 0; 7852 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7853 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7854 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7855 if (nc1 != nc2) ok = 0; 7856 else { 7857 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7858 if (cols1[jj] != cols2[jj]) ok = 0; 7859 if (cols1[jj] % bs != jj % bs) ok = 0; 7860 } 7861 } 7862 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7863 } 7864 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7865 if (!ok) { 7866 PetscCall(PetscFree2(d_nnz, o_nnz)); 7867 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7868 goto old_bs; 7869 } 7870 } 7871 } 7872 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7873 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7874 PetscCall(PetscFree2(d_nnz, o_nnz)); 7875 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7876 // diag 7877 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7878 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7879 7880 ai = aseq->i; 7881 n = ai[brow + 1] - ai[brow]; 7882 aj = aseq->j + ai[brow]; 7883 for (PetscInt k = 0; k < n; k += bs) { // block columns 7884 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7885 val = 0; 7886 if (index_size == 0) { 7887 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7888 aa = aseq->a + ai[brow + ii] + k; 7889 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7890 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7891 } 7892 } 7893 } else { // use (index,index) value if provided 7894 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7895 PetscInt ii = index[iii]; 7896 aa = aseq->a + ai[brow + ii] + k; 7897 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7898 PetscInt jj = index[jjj]; 7899 val += PetscAbs(PetscRealPart(aa[jj])); 7900 } 7901 } 7902 } 7903 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7904 AA[k / bs] = val; 7905 } 7906 grow = Istart / bs + brow / bs; 7907 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7908 } 7909 // off-diag 7910 if (ismpiaij) { 7911 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7912 const PetscScalar *vals; 7913 const PetscInt *cols, *garray = aij->garray; 7914 7915 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7916 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7917 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7918 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7919 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7920 AA[k / bs] = 0; 7921 AJ[cidx] = garray[cols[k]] / bs; 7922 } 7923 nc = ncols / bs; 7924 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7925 if (index_size == 0) { 7926 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7927 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7928 for (PetscInt k = 0; k < ncols; k += bs) { 7929 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7930 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7931 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7932 } 7933 } 7934 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7935 } 7936 } else { // use (index,index) value if provided 7937 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7938 PetscInt ii = index[iii]; 7939 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7940 for (PetscInt k = 0; k < ncols; k += bs) { 7941 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7942 PetscInt jj = index[jjj]; 7943 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7944 } 7945 } 7946 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7947 } 7948 } 7949 grow = Istart / bs + brow / bs; 7950 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7951 } 7952 } 7953 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7954 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7955 PetscCall(PetscFree2(AA, AJ)); 7956 } else { 7957 const PetscScalar *vals; 7958 const PetscInt *idx; 7959 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7960 old_bs: 7961 /* 7962 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7963 */ 7964 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7965 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7966 if (isseqaij) { 7967 PetscInt max_d_nnz; 7968 7969 /* 7970 Determine exact preallocation count for (sequential) scalar matrix 7971 */ 7972 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7973 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7974 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7975 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7976 PetscCall(PetscFree3(w0, w1, w2)); 7977 } else if (ismpiaij) { 7978 Mat Daij, Oaij; 7979 const PetscInt *garray; 7980 PetscInt max_d_nnz; 7981 7982 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7983 /* 7984 Determine exact preallocation count for diagonal block portion of scalar matrix 7985 */ 7986 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7987 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7988 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7989 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7990 PetscCall(PetscFree3(w0, w1, w2)); 7991 /* 7992 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7993 */ 7994 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7995 o_nnz[jj] = 0; 7996 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7997 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7998 o_nnz[jj] += ncols; 7999 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 8000 } 8001 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 8002 } 8003 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 8004 /* get scalar copy (norms) of matrix */ 8005 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 8006 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 8007 PetscCall(PetscFree2(d_nnz, o_nnz)); 8008 for (Ii = Istart; Ii < Iend; Ii++) { 8009 PetscInt dest_row = Ii / bs; 8010 8011 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 8012 for (jj = 0; jj < ncols; jj++) { 8013 PetscInt dest_col = idx[jj] / bs; 8014 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 8015 8016 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 8017 } 8018 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 8019 } 8020 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 8021 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 8022 } 8023 } else { 8024 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 8025 else { 8026 Gmat = Amat; 8027 PetscCall(PetscObjectReference((PetscObject)Gmat)); 8028 } 8029 if (isseqaij) { 8030 a = Gmat; 8031 b = NULL; 8032 } else { 8033 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 8034 a = d->A; 8035 b = d->B; 8036 } 8037 if (filter >= 0 || scale) { 8038 /* take absolute value of each entry */ 8039 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 8040 MatInfo info; 8041 PetscScalar *avals; 8042 8043 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 8044 PetscCall(MatSeqAIJGetArray(c, &avals)); 8045 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 8046 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 8047 } 8048 } 8049 } 8050 if (symmetrize) { 8051 PetscBool isset, issym; 8052 8053 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8054 if (!isset || !issym) { 8055 Mat matTrans; 8056 8057 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8058 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8059 PetscCall(MatDestroy(&matTrans)); 8060 } 8061 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8062 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8063 if (scale) { 8064 /* scale c for all diagonal values = 1 or -1 */ 8065 Vec diag; 8066 8067 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8068 PetscCall(MatGetDiagonal(Gmat, diag)); 8069 PetscCall(VecReciprocal(diag)); 8070 PetscCall(VecSqrtAbs(diag)); 8071 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8072 PetscCall(VecDestroy(&diag)); 8073 } 8074 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8075 if (filter >= 0) { 8076 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8077 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8078 } 8079 *a_Gmat = Gmat; 8080 PetscFunctionReturn(PETSC_SUCCESS); 8081 } 8082 8083 /* 8084 Special version for direct calls from Fortran 8085 */ 8086 8087 /* Change these macros so can be used in void function */ 8088 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8089 #undef PetscCall 8090 #define PetscCall(...) \ 8091 do { \ 8092 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8093 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8094 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8095 return; \ 8096 } \ 8097 } while (0) 8098 8099 #undef SETERRQ 8100 #define SETERRQ(comm, ierr, ...) \ 8101 do { \ 8102 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8103 return; \ 8104 } while (0) 8105 8106 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8107 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8108 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8109 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8110 #else 8111 #endif 8112 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8113 { 8114 Mat mat = *mmat; 8115 PetscInt m = *mm, n = *mn; 8116 InsertMode addv = *maddv; 8117 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8118 PetscScalar value; 8119 8120 MatCheckPreallocated(mat, 1); 8121 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8122 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8123 { 8124 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8125 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8126 PetscBool roworiented = aij->roworiented; 8127 8128 /* Some Variables required in the macro */ 8129 Mat A = aij->A; 8130 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8131 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8132 MatScalar *aa; 8133 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8134 Mat B = aij->B; 8135 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8136 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8137 MatScalar *ba; 8138 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8139 * cannot use "#if defined" inside a macro. */ 8140 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8141 8142 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8143 PetscInt nonew = a->nonew; 8144 MatScalar *ap1, *ap2; 8145 8146 PetscFunctionBegin; 8147 PetscCall(MatSeqAIJGetArray(A, &aa)); 8148 PetscCall(MatSeqAIJGetArray(B, &ba)); 8149 for (i = 0; i < m; i++) { 8150 if (im[i] < 0) continue; 8151 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8152 if (im[i] >= rstart && im[i] < rend) { 8153 row = im[i] - rstart; 8154 lastcol1 = -1; 8155 rp1 = aj + ai[row]; 8156 ap1 = aa + ai[row]; 8157 rmax1 = aimax[row]; 8158 nrow1 = ailen[row]; 8159 low1 = 0; 8160 high1 = nrow1; 8161 lastcol2 = -1; 8162 rp2 = bj + bi[row]; 8163 ap2 = ba + bi[row]; 8164 rmax2 = bimax[row]; 8165 nrow2 = bilen[row]; 8166 low2 = 0; 8167 high2 = nrow2; 8168 8169 for (j = 0; j < n; j++) { 8170 if (roworiented) value = v[i * n + j]; 8171 else value = v[i + j * m]; 8172 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8173 if (in[j] >= cstart && in[j] < cend) { 8174 col = in[j] - cstart; 8175 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8176 } else if (in[j] < 0) continue; 8177 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8178 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8179 } else { 8180 if (mat->was_assembled) { 8181 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8182 #if defined(PETSC_USE_CTABLE) 8183 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8184 col--; 8185 #else 8186 col = aij->colmap[in[j]] - 1; 8187 #endif 8188 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8189 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8190 col = in[j]; 8191 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8192 B = aij->B; 8193 b = (Mat_SeqAIJ *)B->data; 8194 bimax = b->imax; 8195 bi = b->i; 8196 bilen = b->ilen; 8197 bj = b->j; 8198 rp2 = bj + bi[row]; 8199 ap2 = ba + bi[row]; 8200 rmax2 = bimax[row]; 8201 nrow2 = bilen[row]; 8202 low2 = 0; 8203 high2 = nrow2; 8204 bm = aij->B->rmap->n; 8205 ba = b->a; 8206 inserted = PETSC_FALSE; 8207 } 8208 } else col = in[j]; 8209 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8210 } 8211 } 8212 } else if (!aij->donotstash) { 8213 if (roworiented) { 8214 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8215 } else { 8216 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8217 } 8218 } 8219 } 8220 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8221 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8222 } 8223 PetscFunctionReturnVoid(); 8224 } 8225 8226 /* Undefining these here since they were redefined from their original definition above! No 8227 * other PETSc functions should be defined past this point, as it is impossible to recover the 8228 * original definitions */ 8229 #undef PetscCall 8230 #undef SETERRQ 8231