1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow down the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 618 } 619 } 620 } else { 621 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 626 } else { 627 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 628 } 629 } 630 } 631 } 632 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 633 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /* 638 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 639 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 640 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 641 */ 642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 643 { 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 645 Mat A = aij->A; /* diagonal part of the matrix */ 646 Mat B = aij->B; /* off-diagonal part of the matrix */ 647 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 648 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 649 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 650 PetscInt *ailen = a->ilen, *aj = a->j; 651 PetscInt *bilen = b->ilen, *bj = b->j; 652 PetscInt am = aij->A->rmap->n, j; 653 PetscInt diag_so_far = 0, dnz; 654 PetscInt offd_so_far = 0, onz; 655 656 PetscFunctionBegin; 657 /* Iterate over all rows of the matrix */ 658 for (j = 0; j < am; j++) { 659 dnz = onz = 0; 660 /* Iterate over all non-zero columns of the current row */ 661 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 662 /* If column is in the diagonal */ 663 if (mat_j[col] >= cstart && mat_j[col] < cend) { 664 aj[diag_so_far++] = mat_j[col] - cstart; 665 dnz++; 666 } else { /* off-diagonal entries */ 667 bj[offd_so_far++] = mat_j[col]; 668 onz++; 669 } 670 } 671 ailen[j] = dnz; 672 bilen[j] = onz; 673 } 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 677 /* 678 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 679 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 680 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 681 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 682 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 683 */ 684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 685 { 686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 687 Mat A = aij->A; /* diagonal part of the matrix */ 688 Mat B = aij->B; /* off-diagonal part of the matrix */ 689 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 690 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 691 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 692 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 693 PetscInt *ailen = a->ilen, *aj = a->j; 694 PetscInt *bilen = b->ilen, *bj = b->j; 695 PetscInt am = aij->A->rmap->n, j; 696 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 697 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 698 PetscScalar *aa = a->a, *ba = b->a; 699 700 PetscFunctionBegin; 701 /* Iterate over all rows of the matrix */ 702 for (j = 0; j < am; j++) { 703 dnz_row = onz_row = 0; 704 rowstart_offd = full_offd_i[j]; 705 rowstart_diag = full_diag_i[j]; 706 /* Iterate over all non-zero columns of the current row */ 707 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 708 /* If column is in the diagonal */ 709 if (mat_j[col] >= cstart && mat_j[col] < cend) { 710 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 711 aa[rowstart_diag + dnz_row] = mat_a[col]; 712 dnz_row++; 713 } else { /* off-diagonal entries */ 714 bj[rowstart_offd + onz_row] = mat_j[col]; 715 ba[rowstart_offd + onz_row] = mat_a[col]; 716 onz_row++; 717 } 718 } 719 ailen[j] = dnz_row; 720 bilen[j] = onz_row; 721 } 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 728 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 730 731 PetscFunctionBegin; 732 for (i = 0; i < m; i++) { 733 if (idxm[i] < 0) continue; /* negative row */ 734 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 735 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 736 row = idxm[i] - rstart; 737 for (j = 0; j < n; j++) { 738 if (idxn[j] < 0) continue; /* negative column */ 739 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 743 } else { 744 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 745 #if defined(PETSC_USE_CTABLE) 746 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 747 col--; 748 #else 749 col = aij->colmap[idxn[j]] - 1; 750 #endif 751 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 752 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 753 } 754 } 755 } 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 762 PetscInt nstash, reallocs; 763 764 PetscFunctionBegin; 765 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 766 767 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 768 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 769 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 776 PetscMPIInt n; 777 PetscInt i, j, rstart, ncols, flg; 778 PetscInt *row, *col; 779 PetscBool all_assembled; 780 PetscScalar *val; 781 782 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 783 784 PetscFunctionBegin; 785 if (!aij->donotstash && !mat->nooffprocentries) { 786 while (1) { 787 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 788 if (!flg) break; 789 790 for (i = 0; i < n;) { 791 /* Now identify the consecutive vals belonging to the same row */ 792 for (j = i, rstart = row[j]; j < n; j++) { 793 if (row[j] != rstart) break; 794 } 795 if (j < n) ncols = j - i; 796 else ncols = n - i; 797 /* Now assemble all these values with a single function call */ 798 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 799 i = j; 800 } 801 } 802 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 803 } 804 #if defined(PETSC_HAVE_DEVICE) 805 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 806 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 807 if (mat->boundtocpu) { 808 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 809 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 810 } 811 #endif 812 PetscCall(MatAssemblyBegin(aij->A, mode)); 813 PetscCall(MatAssemblyEnd(aij->A, mode)); 814 815 /* determine if any process has disassembled, if so we must 816 also disassemble ourself, in order that we may reassemble. */ 817 /* 818 if nonzero structure of submatrix B cannot change then we know that 819 no process disassembled thus we can skip this stuff 820 */ 821 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 822 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 823 if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 824 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 825 } 826 } 827 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 828 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 829 #if defined(PETSC_HAVE_DEVICE) 830 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 831 #endif 832 PetscCall(MatAssemblyBegin(aij->B, mode)); 833 PetscCall(MatAssemblyEnd(aij->B, mode)); 834 835 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 836 837 aij->rowvalues = NULL; 838 839 PetscCall(VecDestroy(&aij->diag)); 840 841 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 842 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 843 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 844 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 845 } 846 #if defined(PETSC_HAVE_DEVICE) 847 mat->offloadmask = PETSC_OFFLOAD_BOTH; 848 #endif 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 855 856 PetscFunctionBegin; 857 PetscCall(MatZeroEntries(l->A)); 858 PetscCall(MatZeroEntries(l->B)); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 869 PetscFunctionBegin; 870 /* get locally owned rows */ 871 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 872 PetscCall(MatHasCongruentLayouts(A, &cong)); 873 /* fix right-hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 879 PetscCall(VecGetArrayRead(x, &xx)); 880 PetscCall(VecGetArray(b, &bb)); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 882 PetscCall(VecRestoreArrayRead(x, &xx)); 883 PetscCall(VecRestoreArray(b, &bb)); 884 } 885 886 if (diag != 0.0 && cong) { 887 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 890 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 891 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 892 PetscInt nnwA, nnwB; 893 PetscBool nnzA, nnzB; 894 895 nnwA = aijA->nonew; 896 nnwB = aijB->nonew; 897 nnzA = aijA->keepnonzeropattern; 898 nnzB = aijB->keepnonzeropattern; 899 if (!nnzA) { 900 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 901 aijA->nonew = 0; 902 } 903 if (!nnzB) { 904 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 905 aijB->nonew = 0; 906 } 907 /* Must zero here before the next loop */ 908 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 909 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 910 for (r = 0; r < len; ++r) { 911 const PetscInt row = lrows[r] + A->rmap->rstart; 912 if (row >= A->cmap->N) continue; 913 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 914 } 915 aijA->nonew = nnwA; 916 aijB->nonew = nnwB; 917 } else { 918 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 919 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 920 } 921 PetscCall(PetscFree(lrows)); 922 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 927 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 928 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 929 } 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 934 { 935 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 936 PetscInt n = A->rmap->n; 937 PetscInt i, j, r, m, len = 0; 938 PetscInt *lrows, *owners = A->rmap->range; 939 PetscMPIInt p = 0; 940 PetscSFNode *rrows; 941 PetscSF sf; 942 const PetscScalar *xx; 943 PetscScalar *bb, *mask, *aij_a; 944 Vec xmask, lmask; 945 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 946 const PetscInt *aj, *ii, *ridx; 947 PetscScalar *aa; 948 949 PetscFunctionBegin; 950 /* Create SF where leaves are input rows and roots are owned rows */ 951 PetscCall(PetscMalloc1(n, &lrows)); 952 for (r = 0; r < n; ++r) lrows[r] = -1; 953 PetscCall(PetscMalloc1(N, &rrows)); 954 for (r = 0; r < N; ++r) { 955 const PetscInt idx = rows[r]; 956 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 957 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 958 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 959 } 960 rrows[r].rank = p; 961 rrows[r].index = rows[r] - owners[p]; 962 } 963 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 964 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 965 /* Collect flags for rows to be zeroed */ 966 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 967 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFDestroy(&sf)); 969 /* Compress and put in row numbers */ 970 for (r = 0; r < n; ++r) 971 if (lrows[r] >= 0) lrows[len++] = r; 972 /* zero diagonal part of matrix */ 973 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 974 /* handle off-diagonal part of matrix */ 975 PetscCall(MatCreateVecs(A, &xmask, NULL)); 976 PetscCall(VecDuplicate(l->lvec, &lmask)); 977 PetscCall(VecGetArray(xmask, &bb)); 978 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 979 PetscCall(VecRestoreArray(xmask, &bb)); 980 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecDestroy(&xmask)); 983 if (x && b) { /* this code is buggy when the row and column layout don't match */ 984 PetscBool cong; 985 986 PetscCall(MatHasCongruentLayouts(A, &cong)); 987 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 988 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecGetArrayRead(l->lvec, &xx)); 991 PetscCall(VecGetArray(b, &bb)); 992 } 993 PetscCall(VecGetArray(lmask, &mask)); 994 /* remove zeroed rows of off-diagonal matrix */ 995 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 996 ii = aij->i; 997 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 998 /* loop over all elements of off process part of matrix zeroing removed columns*/ 999 if (aij->compressedrow.use) { 1000 m = aij->compressedrow.nrows; 1001 ii = aij->compressedrow.i; 1002 ridx = aij->compressedrow.rindex; 1003 for (i = 0; i < m; i++) { 1004 n = ii[i + 1] - ii[i]; 1005 aj = aij->j + ii[i]; 1006 aa = aij_a + ii[i]; 1007 1008 for (j = 0; j < n; j++) { 1009 if (PetscAbsScalar(mask[*aj])) { 1010 if (b) bb[*ridx] -= *aa * xx[*aj]; 1011 *aa = 0.0; 1012 } 1013 aa++; 1014 aj++; 1015 } 1016 ridx++; 1017 } 1018 } else { /* do not use compressed row format */ 1019 m = l->B->rmap->n; 1020 for (i = 0; i < m; i++) { 1021 n = ii[i + 1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij_a + ii[i]; 1024 for (j = 0; j < n; j++) { 1025 if (PetscAbsScalar(mask[*aj])) { 1026 if (b) bb[i] -= *aa * xx[*aj]; 1027 *aa = 0.0; 1028 } 1029 aa++; 1030 aj++; 1031 } 1032 } 1033 } 1034 if (x && b) { 1035 PetscCall(VecRestoreArray(b, &bb)); 1036 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1037 } 1038 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1039 PetscCall(VecRestoreArray(lmask, &mask)); 1040 PetscCall(VecDestroy(&lmask)); 1041 PetscCall(PetscFree(lrows)); 1042 1043 /* only change matrix nonzero state if pattern was allowed to be changed */ 1044 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1045 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1046 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1047 } 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 PetscInt nt; 1055 VecScatter Mvctx = a->Mvctx; 1056 1057 PetscFunctionBegin; 1058 PetscCall(VecGetLocalSize(xx, &nt)); 1059 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1060 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1061 PetscUseTypeMethod(a->A, mult, xx, yy); 1062 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1063 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 1071 PetscFunctionBegin; 1072 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1073 PetscFunctionReturn(PETSC_SUCCESS); 1074 } 1075 1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1079 VecScatter Mvctx = a->Mvctx; 1080 1081 PetscFunctionBegin; 1082 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1084 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1085 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 /* do nondiagonal part */ 1095 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1096 /* do local part */ 1097 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1098 /* add partial results together */ 1099 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1100 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1105 { 1106 MPI_Comm comm; 1107 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1108 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1109 IS Me, Notme; 1110 PetscInt M, N, first, last, *notme, i; 1111 PetscBool lf; 1112 PetscMPIInt size; 1113 1114 PetscFunctionBegin; 1115 /* Easy test: symmetric diagonal block */ 1116 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1117 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1118 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1119 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1120 PetscCallMPI(MPI_Comm_size(comm, &size)); 1121 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1122 1123 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1124 PetscCall(MatGetSize(Amat, &M, &N)); 1125 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1126 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1127 for (i = 0; i < first; i++) notme[i] = i; 1128 for (i = last; i < M; i++) notme[i - last + first] = i; 1129 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1130 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1131 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1132 Aoff = Aoffs[0]; 1133 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1134 Boff = Boffs[0]; 1135 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1136 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1137 PetscCall(MatDestroyMatrices(1, &Boffs)); 1138 PetscCall(ISDestroy(&Me)); 1139 PetscCall(ISDestroy(&Notme)); 1140 PetscCall(PetscFree(notme)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1151 /* do local part */ 1152 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1153 /* add partial results together */ 1154 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1155 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1169 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1170 PetscCall(MatGetDiagonal(a->A, v)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatScale(a->A, aa)); 1180 PetscCall(MatScale(a->B, aa)); 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1187 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1188 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1189 const PetscInt *garray = aij->garray; 1190 const PetscScalar *aa, *ba; 1191 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1192 PetscInt64 nz, hnz; 1193 PetscInt *rowlens; 1194 PetscInt *colidxs; 1195 PetscScalar *matvals; 1196 PetscMPIInt rank; 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscViewerSetUp(viewer)); 1200 1201 M = mat->rmap->N; 1202 N = mat->cmap->N; 1203 m = mat->rmap->n; 1204 rs = mat->rmap->rstart; 1205 cs = mat->cmap->rstart; 1206 nz = A->nz + B->nz; 1207 1208 /* write matrix header */ 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = M; 1211 header[2] = N; 1212 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1213 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1214 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1215 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1216 1217 /* fill in and store row lengths */ 1218 PetscCall(PetscMalloc1(m, &rowlens)); 1219 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1220 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1221 PetscCall(PetscFree(rowlens)); 1222 1223 /* fill in and store column indices */ 1224 PetscCall(PetscMalloc1(nz, &colidxs)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 colidxs[cnt++] = garray[B->j[jb]]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1231 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1235 PetscCall(PetscFree(colidxs)); 1236 1237 /* fill in and store nonzero values */ 1238 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1239 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1240 PetscCall(PetscMalloc1(nz, &matvals)); 1241 for (cnt = 0, i = 0; i < m; i++) { 1242 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1247 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1248 } 1249 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1251 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1253 PetscCall(PetscFree(matvals)); 1254 1255 /* write block size option to the viewer's .info file */ 1256 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 #include <petscdraw.h> 1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1264 PetscMPIInt rank = aij->rank, size = aij->size; 1265 PetscBool isdraw, isascii, isbinary; 1266 PetscViewer sviewer; 1267 PetscViewerFormat format; 1268 1269 PetscFunctionBegin; 1270 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1273 if (isascii) { 1274 PetscCall(PetscViewerGetFormat(viewer, &format)); 1275 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1276 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1277 PetscCall(PetscMalloc1(size, &nz)); 1278 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1279 for (i = 0; i < size; i++) { 1280 nmax = PetscMax(nmax, nz[i]); 1281 nmin = PetscMin(nmin, nz[i]); 1282 navg += nz[i]; 1283 } 1284 PetscCall(PetscFree(nz)); 1285 navg = navg / size; 1286 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 PetscCall(PetscViewerGetFormat(viewer, &format)); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscInt *inodes = NULL; 1293 1294 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1295 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1296 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1297 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1298 if (!inodes) { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1300 info.memory)); 1301 } else { 1302 PetscCall( 1303 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1304 } 1305 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1307 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1309 PetscCall(PetscViewerFlush(viewer)); 1310 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1312 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount, inodelimit, *inodes; 1316 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1317 if (inodes) { 1318 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1319 } else { 1320 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1321 } 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1329 PetscCall(MatView(aij->A, viewer)); 1330 } else { 1331 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1332 } 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isascii && size == 1) { 1335 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1336 PetscCall(MatView(aij->A, viewer)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } else if (isdraw) { 1339 PetscDraw draw; 1340 PetscBool isnull; 1341 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1342 PetscCall(PetscDrawIsNull(draw, &isnull)); 1343 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 1346 { /* assemble the entire matrix onto first processor */ 1347 Mat A = NULL, Av; 1348 IS isrow, iscol; 1349 1350 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1352 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1353 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1354 /* The commented code uses MatCreateSubMatrices instead */ 1355 /* 1356 Mat *AA, A = NULL, Av; 1357 IS isrow,iscol; 1358 1359 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1361 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1362 if (rank == 0) { 1363 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1364 A = AA[0]; 1365 Av = AA[0]; 1366 } 1367 PetscCall(MatDestroySubMatrices(1,&AA)); 1368 */ 1369 PetscCall(ISDestroy(&iscol)); 1370 PetscCall(ISDestroy(&isrow)); 1371 /* 1372 Everyone has to call to draw the matrix since the graphics waits are 1373 synchronized across all processors that share the PetscDraw object 1374 */ 1375 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 if (rank == 0) { 1377 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1378 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1379 } 1380 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1387 { 1388 PetscBool isascii, isdraw, issocket, isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1395 if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1412 1413 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1414 if (flag & SOR_ZERO_INITIAL_GUESS) { 1415 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1416 its--; 1417 } 1418 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1447 if (flag & SOR_ZERO_INITIAL_GUESS) { 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1449 its--; 1450 } 1451 while (its--) { 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 1455 /* update rhs: bb1 = bb - B*x */ 1456 PetscCall(VecScale(mat->lvec, -1.0)); 1457 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1458 1459 /* local sweep */ 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1461 } 1462 } else if (flag & SOR_EISENSTAT) { 1463 Vec xx1; 1464 1465 PetscCall(VecDuplicate(bb, &xx1)); 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1467 1468 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 if (!mat->diag) { 1471 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1472 PetscCall(MatGetDiagonal(matin, mat->diag)); 1473 } 1474 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1475 if (hasop) { 1476 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1477 } else { 1478 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1479 } 1480 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1481 1482 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1483 1484 /* local sweep */ 1485 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1486 PetscCall(VecAXPY(xx, 1.0, xx1)); 1487 PetscCall(VecDestroy(&xx1)); 1488 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1489 1490 PetscCall(VecDestroy(&bb1)); 1491 1492 matin->factorerrortype = mat->A->factorerrortype; 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1497 { 1498 Mat aA, aB, Aperm; 1499 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1500 PetscScalar *aa, *ba; 1501 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1502 PetscSF rowsf, sf; 1503 IS parcolp = NULL; 1504 PetscBool done; 1505 1506 PetscFunctionBegin; 1507 PetscCall(MatGetLocalSize(A, &m, &n)); 1508 PetscCall(ISGetIndices(rowp, &rwant)); 1509 PetscCall(ISGetIndices(colp, &cwant)); 1510 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1511 1512 /* Invert row permutation to find out where my rows should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1514 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1515 PetscCall(PetscSFSetFromOptions(rowsf)); 1516 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 1520 /* Invert column permutation to find out where my columns should go */ 1521 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1522 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1523 PetscCall(PetscSFSetFromOptions(sf)); 1524 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1525 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1526 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFDestroy(&sf)); 1528 1529 PetscCall(ISRestoreIndices(rowp, &rwant)); 1530 PetscCall(ISRestoreIndices(colp, &cwant)); 1531 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1532 1533 /* Find out where my gcols should go */ 1534 PetscCall(MatGetSize(aB, NULL, &ng)); 1535 PetscCall(PetscMalloc1(ng, &gcdest)); 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1540 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFDestroy(&sf)); 1542 1543 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1544 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1545 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1546 for (i = 0; i < m; i++) { 1547 PetscInt row = rdest[i]; 1548 PetscMPIInt rowner; 1549 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1550 for (j = ai[i]; j < ai[i + 1]; j++) { 1551 PetscInt col = cdest[aj[j]]; 1552 PetscMPIInt cowner; 1553 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1554 if (rowner == cowner) dnnz[i]++; 1555 else onnz[i]++; 1556 } 1557 for (j = bi[i]; j < bi[i + 1]; j++) { 1558 PetscInt col = gcdest[bj[j]]; 1559 PetscMPIInt cowner; 1560 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1561 if (rowner == cowner) dnnz[i]++; 1562 else onnz[i]++; 1563 } 1564 } 1565 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1566 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&rowsf)); 1570 1571 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1572 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1573 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1574 for (i = 0; i < m; i++) { 1575 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1576 PetscInt j0, rowlen; 1577 rowlen = ai[i + 1] - ai[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1579 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1581 } 1582 rowlen = bi[i + 1] - bi[i]; 1583 for (j0 = j = 0; j < rowlen; j0 = j) { 1584 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1585 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1586 } 1587 } 1588 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1589 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1591 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1592 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1593 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1594 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1595 PetscCall(PetscFree3(work, rdest, cdest)); 1596 PetscCall(PetscFree(gcdest)); 1597 if (parcolp) PetscCall(ISDestroy(&colp)); 1598 *B = Aperm; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1603 { 1604 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1605 1606 PetscFunctionBegin; 1607 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1608 if (ghosts) *ghosts = aij->garray; 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1613 { 1614 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1615 Mat A = mat->A, B = mat->B; 1616 PetscLogDouble isend[5], irecv[5]; 1617 1618 PetscFunctionBegin; 1619 info->block_size = 1.0; 1620 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1621 1622 isend[0] = info->nz_used; 1623 isend[1] = info->nz_allocated; 1624 isend[2] = info->nz_unneeded; 1625 isend[3] = info->memory; 1626 isend[4] = info->mallocs; 1627 1628 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1629 1630 isend[0] += info->nz_used; 1631 isend[1] += info->nz_allocated; 1632 isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; 1634 isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A, 1); 1679 PetscCall(MatSetOption(a->A, op, flg)); 1680 PetscCall(MatSetOption(a->B, op, flg)); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A, 1); 1684 a->roworiented = flg; 1685 1686 PetscCall(MatSetOption(a->A, op, flg)); 1687 PetscCall(MatSetOption(a->B, op, flg)); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 default: 1707 break; 1708 } 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1713 { 1714 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1715 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1716 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1717 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1718 PetscInt *cmap, *idx_p; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1722 mat->getrowactive = PETSC_TRUE; 1723 1724 if (!mat->rowvalues && (idx || v)) { 1725 /* 1726 allocate enough space to hold information from the longest row. 1727 */ 1728 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1729 PetscInt max = 1, tmp; 1730 for (i = 0; i < matin->rmap->n; i++) { 1731 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1732 if (max < tmp) max = tmp; 1733 } 1734 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1735 } 1736 1737 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1738 lrow = row - rstart; 1739 1740 pvA = &vworkA; 1741 pcA = &cworkA; 1742 pvB = &vworkB; 1743 pcB = &cworkB; 1744 if (!v) { 1745 pvA = NULL; 1746 pvB = NULL; 1747 } 1748 if (!idx) { 1749 pcA = NULL; 1750 if (!v) pcB = NULL; 1751 } 1752 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1753 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1754 nztot = nzA + nzB; 1755 1756 cmap = mat->garray; 1757 if (v || idx) { 1758 if (nztot) { 1759 /* Sort by increasing column numbers, assuming A and B already sorted */ 1760 PetscInt imark = -1; 1761 if (v) { 1762 *v = v_p = mat->rowvalues; 1763 for (i = 0; i < nzB; i++) { 1764 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1765 else break; 1766 } 1767 imark = i; 1768 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1769 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1770 } 1771 if (idx) { 1772 *idx = idx_p = mat->rowindices; 1773 if (imark > -1) { 1774 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1775 } else { 1776 for (i = 0; i < nzB; i++) { 1777 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1778 else break; 1779 } 1780 imark = i; 1781 } 1782 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1783 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1784 } 1785 } else { 1786 if (idx) *idx = NULL; 1787 if (v) *v = NULL; 1788 } 1789 } 1790 *nz = nztot; 1791 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1792 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 1800 PetscFunctionBegin; 1801 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1802 aij->getrowactive = PETSC_FALSE; 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1807 { 1808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1809 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1810 PetscInt i, j; 1811 PetscReal sum = 0.0; 1812 const MatScalar *v, *amata, *bmata; 1813 1814 PetscFunctionBegin; 1815 if (aij->size == 1) { 1816 PetscCall(MatNorm(aij->A, type, norm)); 1817 } else { 1818 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1819 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1820 if (type == NORM_FROBENIUS) { 1821 v = amata; 1822 for (i = 0; i < amat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v) * (*v)); 1824 v++; 1825 } 1826 v = bmata; 1827 for (i = 0; i < bmat->nz; i++) { 1828 sum += PetscRealPart(PetscConj(*v) * (*v)); 1829 v++; 1830 } 1831 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1832 *norm = PetscSqrtReal(*norm); 1833 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1834 } else if (type == NORM_1) { /* max column norm */ 1835 Vec col, bcol; 1836 PetscScalar *array; 1837 PetscInt *jj, *garray = aij->garray; 1838 1839 PetscCall(MatCreateVecs(mat, &col, NULL)); 1840 PetscCall(VecSet(col, 0.0)); 1841 PetscCall(VecGetArrayWrite(col, &array)); 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) array[*jj++] += PetscAbsScalar(*v++); 1845 PetscCall(VecRestoreArrayWrite(col, &array)); 1846 PetscCall(MatCreateVecs(aij->B, &bcol, NULL)); 1847 PetscCall(VecSet(bcol, 0.0)); 1848 PetscCall(VecGetArrayWrite(bcol, &array)); 1849 v = bmata; 1850 jj = bmat->j; 1851 for (j = 0; j < bmat->nz; j++) array[*jj++] += PetscAbsScalar(*v++); 1852 PetscCall(VecSetValues(col, aij->B->cmap->n, garray, array, ADD_VALUES)); 1853 PetscCall(VecRestoreArrayWrite(bcol, &array)); 1854 PetscCall(VecDestroy(&bcol)); 1855 PetscCall(VecAssemblyBegin(col)); 1856 PetscCall(VecAssemblyEnd(col)); 1857 PetscCall(VecNorm(col, NORM_INFINITY, norm)); 1858 PetscCall(VecDestroy(&col)); 1859 } else if (type == NORM_INFINITY) { /* max row norm */ 1860 PetscReal ntemp = 0.0; 1861 for (j = 0; j < aij->A->rmap->n; j++) { 1862 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1863 sum = 0.0; 1864 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1865 sum += PetscAbsScalar(*v); 1866 v++; 1867 } 1868 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1869 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 if (sum > ntemp) ntemp = sum; 1874 } 1875 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1876 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1877 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1879 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1880 } 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1885 { 1886 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1887 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1888 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1889 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1890 Mat B, A_diag, *B_diag; 1891 const MatScalar *pbv, *bv; 1892 1893 PetscFunctionBegin; 1894 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1895 ma = A->rmap->n; 1896 na = A->cmap->n; 1897 mb = a->B->rmap->n; 1898 nb = a->B->cmap->n; 1899 ai = Aloc->i; 1900 aj = Aloc->j; 1901 bi = Bloc->i; 1902 bj = Bloc->j; 1903 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1904 PetscInt *d_nnz, *g_nnz, *o_nnz; 1905 PetscSFNode *oloc; 1906 PETSC_UNUSED PetscSF sf; 1907 1908 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1909 /* compute d_nnz for preallocation */ 1910 PetscCall(PetscArrayzero(d_nnz, na)); 1911 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1912 /* compute local off-diagonal contributions */ 1913 PetscCall(PetscArrayzero(g_nnz, nb)); 1914 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1915 /* map those to global */ 1916 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1917 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1918 PetscCall(PetscSFSetFromOptions(sf)); 1919 PetscCall(PetscArrayzero(o_nnz, na)); 1920 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1922 PetscCall(PetscSFDestroy(&sf)); 1923 1924 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1925 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1926 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1927 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1929 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1930 } else { 1931 B = *matout; 1932 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1933 } 1934 1935 b = (Mat_MPIAIJ *)B->data; 1936 A_diag = a->A; 1937 B_diag = &b->A; 1938 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1939 A_diag_ncol = A_diag->cmap->N; 1940 B_diag_ilen = sub_B_diag->ilen; 1941 B_diag_i = sub_B_diag->i; 1942 1943 /* Set ilen for diagonal of B */ 1944 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1949 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1950 1951 /* copy over the B part */ 1952 PetscCall(PetscMalloc1(bi[mb], &cols)); 1953 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1954 pbv = bv; 1955 row = A->rmap->rstart; 1956 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1957 cols_tmp = cols; 1958 for (i = 0; i < mb; i++) { 1959 ncol = bi[i + 1] - bi[i]; 1960 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1961 row++; 1962 if (pbv) pbv += ncol; 1963 if (cols_tmp) cols_tmp += ncol; 1964 } 1965 PetscCall(PetscFree(cols)); 1966 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1967 1968 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1969 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1970 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1971 *matout = B; 1972 } else { 1973 PetscCall(MatHeaderMerge(A, &B)); 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1979 { 1980 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1981 Mat a = aij->A, b = aij->B; 1982 PetscInt s1, s2, s3; 1983 1984 PetscFunctionBegin; 1985 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1986 if (rr) { 1987 PetscCall(VecGetLocalSize(rr, &s1)); 1988 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1989 /* Overlap communication with computation. */ 1990 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1991 } 1992 if (ll) { 1993 PetscCall(VecGetLocalSize(ll, &s1)); 1994 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1995 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1996 } 1997 /* scale the diagonal block */ 1998 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1999 2000 if (rr) { 2001 /* Do a scatter end and then right scale the off-diagonal block */ 2002 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2003 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2004 } 2005 PetscFunctionReturn(PETSC_SUCCESS); 2006 } 2007 2008 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2009 { 2010 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2011 2012 PetscFunctionBegin; 2013 PetscCall(MatSetUnfactored(a->A)); 2014 PetscFunctionReturn(PETSC_SUCCESS); 2015 } 2016 2017 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2020 Mat a, b, c, d; 2021 PetscBool flg; 2022 2023 PetscFunctionBegin; 2024 a = matA->A; 2025 b = matA->B; 2026 c = matB->A; 2027 d = matB->B; 2028 2029 PetscCall(MatEqual(a, c, &flg)); 2030 if (flg) PetscCall(MatEqual(b, d, &flg)); 2031 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2032 PetscFunctionReturn(PETSC_SUCCESS); 2033 } 2034 2035 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2036 { 2037 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2038 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2039 2040 PetscFunctionBegin; 2041 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2042 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2043 /* because of the column compression in the off-processor part of the matrix a->B, 2044 the number of columns in a->B and b->B may be different, hence we cannot call 2045 the MatCopy() directly on the two parts. If need be, we can provide a more 2046 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2047 then copying the submatrices */ 2048 PetscCall(MatCopy_Basic(A, B, str)); 2049 } else { 2050 PetscCall(MatCopy(a->A, b->A, str)); 2051 PetscCall(MatCopy(a->B, b->B, str)); 2052 } 2053 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2054 PetscFunctionReturn(PETSC_SUCCESS); 2055 } 2056 2057 /* 2058 Computes the number of nonzeros per row needed for preallocation when X and Y 2059 have different nonzero structure. 2060 */ 2061 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2062 { 2063 PetscInt i, j, k, nzx, nzy; 2064 2065 PetscFunctionBegin; 2066 /* Set the number of nonzeros in the new matrix */ 2067 for (i = 0; i < m; i++) { 2068 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2069 nzx = xi[i + 1] - xi[i]; 2070 nzy = yi[i + 1] - yi[i]; 2071 nnz[i] = 0; 2072 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2073 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2074 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2075 nnz[i]++; 2076 } 2077 for (; k < nzy; k++) nnz[i]++; 2078 } 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2083 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2084 { 2085 PetscInt m = Y->rmap->N; 2086 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2087 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2088 2089 PetscFunctionBegin; 2090 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2091 PetscFunctionReturn(PETSC_SUCCESS); 2092 } 2093 2094 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2095 { 2096 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2097 2098 PetscFunctionBegin; 2099 if (str == SAME_NONZERO_PATTERN) { 2100 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2101 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2102 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2103 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2104 } else { 2105 Mat B; 2106 PetscInt *nnz_d, *nnz_o; 2107 2108 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2109 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2110 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2111 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2112 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2113 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2114 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2115 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2116 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2117 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2118 PetscCall(MatHeaderMerge(Y, &B)); 2119 PetscCall(PetscFree(nnz_d)); 2120 PetscCall(PetscFree(nnz_o)); 2121 } 2122 PetscFunctionReturn(PETSC_SUCCESS); 2123 } 2124 2125 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2126 2127 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2128 { 2129 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2130 2131 PetscFunctionBegin; 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 PetscFunctionReturn(PETSC_SUCCESS); 2135 } 2136 2137 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2138 { 2139 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2140 2141 PetscFunctionBegin; 2142 PetscCall(MatRealPart(a->A)); 2143 PetscCall(MatRealPart(a->B)); 2144 PetscFunctionReturn(PETSC_SUCCESS); 2145 } 2146 2147 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2148 { 2149 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2150 2151 PetscFunctionBegin; 2152 PetscCall(MatImaginaryPart(a->A)); 2153 PetscCall(MatImaginaryPart(a->B)); 2154 PetscFunctionReturn(PETSC_SUCCESS); 2155 } 2156 2157 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2158 { 2159 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2160 PetscInt i, *idxb = NULL, m = A->rmap->n; 2161 PetscScalar *vv; 2162 Vec vB, vA; 2163 const PetscScalar *va, *vb; 2164 2165 PetscFunctionBegin; 2166 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2167 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2168 2169 PetscCall(VecGetArrayRead(vA, &va)); 2170 if (idx) { 2171 for (i = 0; i < m; i++) { 2172 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2173 } 2174 } 2175 2176 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2177 PetscCall(PetscMalloc1(m, &idxb)); 2178 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2179 2180 PetscCall(VecGetArrayWrite(v, &vv)); 2181 PetscCall(VecGetArrayRead(vB, &vb)); 2182 for (i = 0; i < m; i++) { 2183 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2184 vv[i] = vb[i]; 2185 if (idx) idx[i] = a->garray[idxb[i]]; 2186 } else { 2187 vv[i] = va[i]; 2188 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2189 } 2190 } 2191 PetscCall(VecRestoreArrayWrite(v, &vv)); 2192 PetscCall(VecRestoreArrayRead(vA, &va)); 2193 PetscCall(VecRestoreArrayRead(vB, &vb)); 2194 PetscCall(PetscFree(idxb)); 2195 PetscCall(VecDestroy(&vA)); 2196 PetscCall(VecDestroy(&vB)); 2197 PetscFunctionReturn(PETSC_SUCCESS); 2198 } 2199 2200 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2201 { 2202 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2203 Vec vB, vA; 2204 2205 PetscFunctionBegin; 2206 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2207 PetscCall(MatGetRowSumAbs(a->A, vA)); 2208 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2209 PetscCall(MatGetRowSumAbs(a->B, vB)); 2210 PetscCall(VecAXPY(vA, 1.0, vB)); 2211 PetscCall(VecDestroy(&vB)); 2212 PetscCall(VecCopy(vA, v)); 2213 PetscCall(VecDestroy(&vA)); 2214 PetscFunctionReturn(PETSC_SUCCESS); 2215 } 2216 2217 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2218 { 2219 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2220 PetscInt m = A->rmap->n, n = A->cmap->n; 2221 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2222 PetscInt *cmap = mat->garray; 2223 PetscInt *diagIdx, *offdiagIdx; 2224 Vec diagV, offdiagV; 2225 PetscScalar *a, *diagA, *offdiagA; 2226 const PetscScalar *ba, *bav; 2227 PetscInt r, j, col, ncols, *bi, *bj; 2228 Mat B = mat->B; 2229 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2230 2231 PetscFunctionBegin; 2232 /* When a process holds entire A and other processes have no entry */ 2233 if (A->cmap->N == n) { 2234 PetscCall(VecGetArrayWrite(v, &diagA)); 2235 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2236 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2237 PetscCall(VecDestroy(&diagV)); 2238 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } else if (n == 0) { 2241 if (m) { 2242 PetscCall(VecGetArrayWrite(v, &a)); 2243 for (r = 0; r < m; r++) { 2244 a[r] = 0.0; 2245 if (idx) idx[r] = -1; 2246 } 2247 PetscCall(VecRestoreArrayWrite(v, &a)); 2248 } 2249 PetscFunctionReturn(PETSC_SUCCESS); 2250 } 2251 2252 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2253 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2255 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2256 2257 /* Get offdiagIdx[] for implicit 0.0 */ 2258 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2259 ba = bav; 2260 bi = b->i; 2261 bj = b->j; 2262 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2263 for (r = 0; r < m; r++) { 2264 ncols = bi[r + 1] - bi[r]; 2265 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2266 offdiagA[r] = *ba; 2267 offdiagIdx[r] = cmap[0]; 2268 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2269 offdiagA[r] = 0.0; 2270 2271 /* Find first hole in the cmap */ 2272 for (j = 0; j < ncols; j++) { 2273 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2274 if (col > j && j < cstart) { 2275 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2276 break; 2277 } else if (col > j + n && j >= cstart) { 2278 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2279 break; 2280 } 2281 } 2282 if (j == ncols && ncols < A->cmap->N - n) { 2283 /* a hole is outside compressed Bcols */ 2284 if (ncols == 0) { 2285 if (cstart) { 2286 offdiagIdx[r] = 0; 2287 } else offdiagIdx[r] = cend; 2288 } else { /* ncols > 0 */ 2289 offdiagIdx[r] = cmap[ncols - 1] + 1; 2290 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2291 } 2292 } 2293 } 2294 2295 for (j = 0; j < ncols; j++) { 2296 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2297 offdiagA[r] = *ba; 2298 offdiagIdx[r] = cmap[*bj]; 2299 } 2300 ba++; 2301 bj++; 2302 } 2303 } 2304 2305 PetscCall(VecGetArrayWrite(v, &a)); 2306 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2307 for (r = 0; r < m; ++r) { 2308 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2309 a[r] = diagA[r]; 2310 if (idx) idx[r] = cstart + diagIdx[r]; 2311 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2312 a[r] = diagA[r]; 2313 if (idx) { 2314 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2315 idx[r] = cstart + diagIdx[r]; 2316 } else idx[r] = offdiagIdx[r]; 2317 } 2318 } else { 2319 a[r] = offdiagA[r]; 2320 if (idx) idx[r] = offdiagIdx[r]; 2321 } 2322 } 2323 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2324 PetscCall(VecRestoreArrayWrite(v, &a)); 2325 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2326 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2327 PetscCall(VecDestroy(&diagV)); 2328 PetscCall(VecDestroy(&offdiagV)); 2329 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2330 PetscFunctionReturn(PETSC_SUCCESS); 2331 } 2332 2333 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2334 { 2335 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2336 PetscInt m = A->rmap->n, n = A->cmap->n; 2337 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2338 PetscInt *cmap = mat->garray; 2339 PetscInt *diagIdx, *offdiagIdx; 2340 Vec diagV, offdiagV; 2341 PetscScalar *a, *diagA, *offdiagA; 2342 const PetscScalar *ba, *bav; 2343 PetscInt r, j, col, ncols, *bi, *bj; 2344 Mat B = mat->B; 2345 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2346 2347 PetscFunctionBegin; 2348 /* When a process holds entire A and other processes have no entry */ 2349 if (A->cmap->N == n) { 2350 PetscCall(VecGetArrayWrite(v, &diagA)); 2351 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2352 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2353 PetscCall(VecDestroy(&diagV)); 2354 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } else if (n == 0) { 2357 if (m) { 2358 PetscCall(VecGetArrayWrite(v, &a)); 2359 for (r = 0; r < m; r++) { 2360 a[r] = PETSC_MAX_REAL; 2361 if (idx) idx[r] = -1; 2362 } 2363 PetscCall(VecRestoreArrayWrite(v, &a)); 2364 } 2365 PetscFunctionReturn(PETSC_SUCCESS); 2366 } 2367 2368 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2369 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2371 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2372 2373 /* Get offdiagIdx[] for implicit 0.0 */ 2374 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2375 ba = bav; 2376 bi = b->i; 2377 bj = b->j; 2378 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2379 for (r = 0; r < m; r++) { 2380 ncols = bi[r + 1] - bi[r]; 2381 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2382 offdiagA[r] = *ba; 2383 offdiagIdx[r] = cmap[0]; 2384 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2385 offdiagA[r] = 0.0; 2386 2387 /* Find first hole in the cmap */ 2388 for (j = 0; j < ncols; j++) { 2389 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2390 if (col > j && j < cstart) { 2391 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2392 break; 2393 } else if (col > j + n && j >= cstart) { 2394 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2395 break; 2396 } 2397 } 2398 if (j == ncols && ncols < A->cmap->N - n) { 2399 /* a hole is outside compressed Bcols */ 2400 if (ncols == 0) { 2401 if (cstart) { 2402 offdiagIdx[r] = 0; 2403 } else offdiagIdx[r] = cend; 2404 } else { /* ncols > 0 */ 2405 offdiagIdx[r] = cmap[ncols - 1] + 1; 2406 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2407 } 2408 } 2409 } 2410 2411 for (j = 0; j < ncols; j++) { 2412 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2413 offdiagA[r] = *ba; 2414 offdiagIdx[r] = cmap[*bj]; 2415 } 2416 ba++; 2417 bj++; 2418 } 2419 } 2420 2421 PetscCall(VecGetArrayWrite(v, &a)); 2422 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2423 for (r = 0; r < m; ++r) { 2424 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2425 a[r] = diagA[r]; 2426 if (idx) idx[r] = cstart + diagIdx[r]; 2427 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2428 a[r] = diagA[r]; 2429 if (idx) { 2430 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2431 idx[r] = cstart + diagIdx[r]; 2432 } else idx[r] = offdiagIdx[r]; 2433 } 2434 } else { 2435 a[r] = offdiagA[r]; 2436 if (idx) idx[r] = offdiagIdx[r]; 2437 } 2438 } 2439 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2440 PetscCall(VecRestoreArrayWrite(v, &a)); 2441 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2442 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2443 PetscCall(VecDestroy(&diagV)); 2444 PetscCall(VecDestroy(&offdiagV)); 2445 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2446 PetscFunctionReturn(PETSC_SUCCESS); 2447 } 2448 2449 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2450 { 2451 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2452 PetscInt m = A->rmap->n, n = A->cmap->n; 2453 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2454 PetscInt *cmap = mat->garray; 2455 PetscInt *diagIdx, *offdiagIdx; 2456 Vec diagV, offdiagV; 2457 PetscScalar *a, *diagA, *offdiagA; 2458 const PetscScalar *ba, *bav; 2459 PetscInt r, j, col, ncols, *bi, *bj; 2460 Mat B = mat->B; 2461 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2462 2463 PetscFunctionBegin; 2464 /* When a process holds entire A and other processes have no entry */ 2465 if (A->cmap->N == n) { 2466 PetscCall(VecGetArrayWrite(v, &diagA)); 2467 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2468 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2469 PetscCall(VecDestroy(&diagV)); 2470 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2471 PetscFunctionReturn(PETSC_SUCCESS); 2472 } else if (n == 0) { 2473 if (m) { 2474 PetscCall(VecGetArrayWrite(v, &a)); 2475 for (r = 0; r < m; r++) { 2476 a[r] = PETSC_MIN_REAL; 2477 if (idx) idx[r] = -1; 2478 } 2479 PetscCall(VecRestoreArrayWrite(v, &a)); 2480 } 2481 PetscFunctionReturn(PETSC_SUCCESS); 2482 } 2483 2484 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2485 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2487 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2488 2489 /* Get offdiagIdx[] for implicit 0.0 */ 2490 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2491 ba = bav; 2492 bi = b->i; 2493 bj = b->j; 2494 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2495 for (r = 0; r < m; r++) { 2496 ncols = bi[r + 1] - bi[r]; 2497 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2498 offdiagA[r] = *ba; 2499 offdiagIdx[r] = cmap[0]; 2500 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2501 offdiagA[r] = 0.0; 2502 2503 /* Find first hole in the cmap */ 2504 for (j = 0; j < ncols; j++) { 2505 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2506 if (col > j && j < cstart) { 2507 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2508 break; 2509 } else if (col > j + n && j >= cstart) { 2510 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2511 break; 2512 } 2513 } 2514 if (j == ncols && ncols < A->cmap->N - n) { 2515 /* a hole is outside compressed Bcols */ 2516 if (ncols == 0) { 2517 if (cstart) { 2518 offdiagIdx[r] = 0; 2519 } else offdiagIdx[r] = cend; 2520 } else { /* ncols > 0 */ 2521 offdiagIdx[r] = cmap[ncols - 1] + 1; 2522 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2523 } 2524 } 2525 } 2526 2527 for (j = 0; j < ncols; j++) { 2528 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2529 offdiagA[r] = *ba; 2530 offdiagIdx[r] = cmap[*bj]; 2531 } 2532 ba++; 2533 bj++; 2534 } 2535 } 2536 2537 PetscCall(VecGetArrayWrite(v, &a)); 2538 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2539 for (r = 0; r < m; ++r) { 2540 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2541 a[r] = diagA[r]; 2542 if (idx) idx[r] = cstart + diagIdx[r]; 2543 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2544 a[r] = diagA[r]; 2545 if (idx) { 2546 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2547 idx[r] = cstart + diagIdx[r]; 2548 } else idx[r] = offdiagIdx[r]; 2549 } 2550 } else { 2551 a[r] = offdiagA[r]; 2552 if (idx) idx[r] = offdiagIdx[r]; 2553 } 2554 } 2555 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2556 PetscCall(VecRestoreArrayWrite(v, &a)); 2557 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2558 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2559 PetscCall(VecDestroy(&diagV)); 2560 PetscCall(VecDestroy(&offdiagV)); 2561 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2562 PetscFunctionReturn(PETSC_SUCCESS); 2563 } 2564 2565 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2566 { 2567 Mat *dummy; 2568 2569 PetscFunctionBegin; 2570 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2571 *newmat = *dummy; 2572 PetscCall(PetscFree(dummy)); 2573 PetscFunctionReturn(PETSC_SUCCESS); 2574 } 2575 2576 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2577 { 2578 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2579 2580 PetscFunctionBegin; 2581 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2582 A->factorerrortype = a->A->factorerrortype; 2583 PetscFunctionReturn(PETSC_SUCCESS); 2584 } 2585 2586 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2587 { 2588 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2589 2590 PetscFunctionBegin; 2591 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2592 PetscCall(MatSetRandom(aij->A, rctx)); 2593 if (x->assembled) { 2594 PetscCall(MatSetRandom(aij->B, rctx)); 2595 } else { 2596 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2597 } 2598 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2599 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2600 PetscFunctionReturn(PETSC_SUCCESS); 2601 } 2602 2603 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2604 { 2605 PetscFunctionBegin; 2606 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2607 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2608 PetscFunctionReturn(PETSC_SUCCESS); 2609 } 2610 2611 /*@ 2612 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2613 2614 Not Collective 2615 2616 Input Parameter: 2617 . A - the matrix 2618 2619 Output Parameter: 2620 . nz - the number of nonzeros 2621 2622 Level: advanced 2623 2624 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2625 @*/ 2626 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2627 { 2628 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2629 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2630 PetscBool isaij; 2631 2632 PetscFunctionBegin; 2633 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2634 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2635 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2636 PetscFunctionReturn(PETSC_SUCCESS); 2637 } 2638 2639 /*@ 2640 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2641 2642 Collective 2643 2644 Input Parameters: 2645 + A - the matrix 2646 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2647 2648 Level: advanced 2649 2650 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2651 @*/ 2652 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2653 { 2654 PetscFunctionBegin; 2655 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2660 { 2661 PetscBool sc = PETSC_FALSE, flg; 2662 2663 PetscFunctionBegin; 2664 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2665 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2666 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2667 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2668 PetscOptionsHeadEnd(); 2669 PetscFunctionReturn(PETSC_SUCCESS); 2670 } 2671 2672 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2673 { 2674 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2675 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2676 2677 PetscFunctionBegin; 2678 if (!Y->preallocated) { 2679 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2680 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2681 PetscInt nonew = aij->nonew; 2682 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2683 aij->nonew = nonew; 2684 } 2685 PetscCall(MatShift_Basic(Y, a)); 2686 PetscFunctionReturn(PETSC_SUCCESS); 2687 } 2688 2689 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2690 { 2691 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2692 2693 PetscFunctionBegin; 2694 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2699 { 2700 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2701 2702 PetscFunctionBegin; 2703 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2704 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2705 PetscFunctionReturn(PETSC_SUCCESS); 2706 } 2707 2708 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2709 MatGetRow_MPIAIJ, 2710 MatRestoreRow_MPIAIJ, 2711 MatMult_MPIAIJ, 2712 /* 4*/ MatMultAdd_MPIAIJ, 2713 MatMultTranspose_MPIAIJ, 2714 MatMultTransposeAdd_MPIAIJ, 2715 NULL, 2716 NULL, 2717 NULL, 2718 /*10*/ NULL, 2719 NULL, 2720 NULL, 2721 MatSOR_MPIAIJ, 2722 MatTranspose_MPIAIJ, 2723 /*15*/ MatGetInfo_MPIAIJ, 2724 MatEqual_MPIAIJ, 2725 MatGetDiagonal_MPIAIJ, 2726 MatDiagonalScale_MPIAIJ, 2727 MatNorm_MPIAIJ, 2728 /*20*/ MatAssemblyBegin_MPIAIJ, 2729 MatAssemblyEnd_MPIAIJ, 2730 MatSetOption_MPIAIJ, 2731 MatZeroEntries_MPIAIJ, 2732 /*24*/ MatZeroRows_MPIAIJ, 2733 NULL, 2734 NULL, 2735 NULL, 2736 NULL, 2737 /*29*/ MatSetUp_MPI_Hash, 2738 NULL, 2739 NULL, 2740 MatGetDiagonalBlock_MPIAIJ, 2741 NULL, 2742 /*34*/ MatDuplicate_MPIAIJ, 2743 NULL, 2744 NULL, 2745 NULL, 2746 NULL, 2747 /*39*/ MatAXPY_MPIAIJ, 2748 MatCreateSubMatrices_MPIAIJ, 2749 MatIncreaseOverlap_MPIAIJ, 2750 MatGetValues_MPIAIJ, 2751 MatCopy_MPIAIJ, 2752 /*44*/ MatGetRowMax_MPIAIJ, 2753 MatScale_MPIAIJ, 2754 MatShift_MPIAIJ, 2755 MatDiagonalSet_MPIAIJ, 2756 MatZeroRowsColumns_MPIAIJ, 2757 /*49*/ MatSetRandom_MPIAIJ, 2758 MatGetRowIJ_MPIAIJ, 2759 MatRestoreRowIJ_MPIAIJ, 2760 NULL, 2761 NULL, 2762 /*54*/ MatFDColoringCreate_MPIXAIJ, 2763 NULL, 2764 MatSetUnfactored_MPIAIJ, 2765 MatPermute_MPIAIJ, 2766 NULL, 2767 /*59*/ MatCreateSubMatrix_MPIAIJ, 2768 MatDestroy_MPIAIJ, 2769 MatView_MPIAIJ, 2770 NULL, 2771 NULL, 2772 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2773 NULL, 2774 NULL, 2775 NULL, 2776 MatGetRowMaxAbs_MPIAIJ, 2777 /*69*/ MatGetRowMinAbs_MPIAIJ, 2778 NULL, 2779 NULL, 2780 MatFDColoringApply_AIJ, 2781 MatSetFromOptions_MPIAIJ, 2782 MatFindZeroDiagonals_MPIAIJ, 2783 /*75*/ NULL, 2784 NULL, 2785 NULL, 2786 MatLoad_MPIAIJ, 2787 NULL, 2788 /*80*/ NULL, 2789 NULL, 2790 NULL, 2791 /*83*/ NULL, 2792 NULL, 2793 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2794 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2795 NULL, 2796 NULL, 2797 /*89*/ MatBindToCPU_MPIAIJ, 2798 MatProductSetFromOptions_MPIAIJ, 2799 NULL, 2800 NULL, 2801 MatConjugate_MPIAIJ, 2802 /*94*/ NULL, 2803 MatSetValuesRow_MPIAIJ, 2804 MatRealPart_MPIAIJ, 2805 MatImaginaryPart_MPIAIJ, 2806 NULL, 2807 /*99*/ NULL, 2808 NULL, 2809 NULL, 2810 MatGetRowMin_MPIAIJ, 2811 NULL, 2812 /*104*/ MatGetSeqNonzeroStructure_MPIAIJ, 2813 NULL, 2814 MatGetGhosts_MPIAIJ, 2815 NULL, 2816 NULL, 2817 /*109*/ MatMultDiagonalBlock_MPIAIJ, 2818 NULL, 2819 NULL, 2820 NULL, 2821 MatGetMultiProcBlock_MPIAIJ, 2822 /*114*/ MatFindNonzeroRows_MPIAIJ, 2823 MatGetColumnReductions_MPIAIJ, 2824 MatInvertBlockDiagonal_MPIAIJ, 2825 MatInvertVariableBlockDiagonal_MPIAIJ, 2826 MatCreateSubMatricesMPI_MPIAIJ, 2827 /*119*/ NULL, 2828 NULL, 2829 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2830 NULL, 2831 NULL, 2832 /*124*/ NULL, 2833 NULL, 2834 MatSetBlockSizes_MPIAIJ, 2835 NULL, 2836 MatFDColoringSetUp_MPIXAIJ, 2837 /*129*/ MatFindOffBlockDiagonalEntries_MPIAIJ, 2838 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2839 NULL, 2840 NULL, 2841 NULL, 2842 /*134*/ MatCreateGraph_Simple_AIJ, 2843 NULL, 2844 MatEliminateZeros_MPIAIJ, 2845 MatGetRowSumAbs_MPIAIJ, 2846 NULL, 2847 /*139*/ NULL, 2848 NULL, 2849 MatCopyHashToXAIJ_MPI_Hash, 2850 MatGetCurrentMemType_MPIAIJ, 2851 NULL}; 2852 2853 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2854 { 2855 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2856 2857 PetscFunctionBegin; 2858 PetscCall(MatStoreValues(aij->A)); 2859 PetscCall(MatStoreValues(aij->B)); 2860 PetscFunctionReturn(PETSC_SUCCESS); 2861 } 2862 2863 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2864 { 2865 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2866 2867 PetscFunctionBegin; 2868 PetscCall(MatRetrieveValues(aij->A)); 2869 PetscCall(MatRetrieveValues(aij->B)); 2870 PetscFunctionReturn(PETSC_SUCCESS); 2871 } 2872 2873 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2874 { 2875 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2876 PetscMPIInt size; 2877 2878 PetscFunctionBegin; 2879 if (B->hash_active) { 2880 B->ops[0] = b->cops; 2881 B->hash_active = PETSC_FALSE; 2882 } 2883 PetscCall(PetscLayoutSetUp(B->rmap)); 2884 PetscCall(PetscLayoutSetUp(B->cmap)); 2885 2886 #if defined(PETSC_USE_CTABLE) 2887 PetscCall(PetscHMapIDestroy(&b->colmap)); 2888 #else 2889 PetscCall(PetscFree(b->colmap)); 2890 #endif 2891 PetscCall(PetscFree(b->garray)); 2892 PetscCall(VecDestroy(&b->lvec)); 2893 PetscCall(VecScatterDestroy(&b->Mvctx)); 2894 2895 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2896 2897 MatSeqXAIJGetOptions_Private(b->B); 2898 PetscCall(MatDestroy(&b->B)); 2899 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2900 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2901 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2902 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2903 MatSeqXAIJRestoreOptions_Private(b->B); 2904 2905 MatSeqXAIJGetOptions_Private(b->A); 2906 PetscCall(MatDestroy(&b->A)); 2907 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2908 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2909 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2910 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2911 MatSeqXAIJRestoreOptions_Private(b->A); 2912 2913 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2914 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2915 B->preallocated = PETSC_TRUE; 2916 B->was_assembled = PETSC_FALSE; 2917 B->assembled = PETSC_FALSE; 2918 PetscFunctionReturn(PETSC_SUCCESS); 2919 } 2920 2921 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2922 { 2923 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2924 PetscBool ondiagreset, offdiagreset, memoryreset; 2925 2926 PetscFunctionBegin; 2927 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2928 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2929 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2930 2931 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2932 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2933 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2934 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2935 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2936 2937 PetscCall(PetscLayoutSetUp(B->rmap)); 2938 PetscCall(PetscLayoutSetUp(B->cmap)); 2939 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2940 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2941 PetscCall(VecScatterDestroy(&b->Mvctx)); 2942 2943 B->preallocated = PETSC_TRUE; 2944 B->was_assembled = PETSC_FALSE; 2945 B->assembled = PETSC_FALSE; 2946 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2947 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2952 { 2953 Mat mat; 2954 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2955 2956 PetscFunctionBegin; 2957 *newmat = NULL; 2958 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2959 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2960 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2961 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2962 a = (Mat_MPIAIJ *)mat->data; 2963 2964 mat->factortype = matin->factortype; 2965 mat->assembled = matin->assembled; 2966 mat->insertmode = NOT_SET_VALUES; 2967 2968 a->size = oldmat->size; 2969 a->rank = oldmat->rank; 2970 a->donotstash = oldmat->donotstash; 2971 a->roworiented = oldmat->roworiented; 2972 a->rowindices = NULL; 2973 a->rowvalues = NULL; 2974 a->getrowactive = PETSC_FALSE; 2975 2976 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2977 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2978 if (matin->hash_active) { 2979 PetscCall(MatSetUp(mat)); 2980 } else { 2981 mat->preallocated = matin->preallocated; 2982 if (oldmat->colmap) { 2983 #if defined(PETSC_USE_CTABLE) 2984 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2985 #else 2986 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2987 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2988 #endif 2989 } else a->colmap = NULL; 2990 if (oldmat->garray) { 2991 PetscInt len; 2992 len = oldmat->B->cmap->n; 2993 PetscCall(PetscMalloc1(len, &a->garray)); 2994 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2995 } else a->garray = NULL; 2996 2997 /* It may happen MatDuplicate is called with a non-assembled matrix 2998 In fact, MatDuplicate only requires the matrix to be preallocated 2999 This may happen inside a DMCreateMatrix_Shell */ 3000 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3001 if (oldmat->Mvctx) { 3002 a->Mvctx = oldmat->Mvctx; 3003 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3004 } 3005 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3006 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3007 } 3008 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3009 *newmat = mat; 3010 PetscFunctionReturn(PETSC_SUCCESS); 3011 } 3012 3013 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3014 { 3015 PetscBool isbinary, ishdf5; 3016 3017 PetscFunctionBegin; 3018 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3019 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3020 /* force binary viewer to load .info file if it has not yet done so */ 3021 PetscCall(PetscViewerSetUp(viewer)); 3022 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3023 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3024 if (isbinary) { 3025 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3026 } else if (ishdf5) { 3027 #if defined(PETSC_HAVE_HDF5) 3028 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3029 #else 3030 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3031 #endif 3032 } else { 3033 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3034 } 3035 PetscFunctionReturn(PETSC_SUCCESS); 3036 } 3037 3038 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3039 { 3040 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3041 PetscInt *rowidxs, *colidxs; 3042 PetscScalar *matvals; 3043 3044 PetscFunctionBegin; 3045 PetscCall(PetscViewerSetUp(viewer)); 3046 3047 /* read in matrix header */ 3048 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3049 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3050 M = header[1]; 3051 N = header[2]; 3052 nz = header[3]; 3053 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3054 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3055 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3056 3057 /* set block sizes from the viewer's .info file */ 3058 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3059 /* set global sizes if not set already */ 3060 if (mat->rmap->N < 0) mat->rmap->N = M; 3061 if (mat->cmap->N < 0) mat->cmap->N = N; 3062 PetscCall(PetscLayoutSetUp(mat->rmap)); 3063 PetscCall(PetscLayoutSetUp(mat->cmap)); 3064 3065 /* check if the matrix sizes are correct */ 3066 PetscCall(MatGetSize(mat, &rows, &cols)); 3067 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3068 3069 /* read in row lengths and build row indices */ 3070 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3071 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3072 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3073 rowidxs[0] = 0; 3074 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3075 if (nz != PETSC_INT_MAX) { 3076 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3077 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3078 } 3079 3080 /* read in column indices and matrix values */ 3081 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3082 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3083 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3084 /* store matrix indices and values */ 3085 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3086 PetscCall(PetscFree(rowidxs)); 3087 PetscCall(PetscFree2(colidxs, matvals)); 3088 PetscFunctionReturn(PETSC_SUCCESS); 3089 } 3090 3091 /* Not scalable because of ISAllGather() unless getting all columns. */ 3092 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3093 { 3094 IS iscol_local; 3095 PetscBool isstride; 3096 PetscMPIInt gisstride = 0; 3097 3098 PetscFunctionBegin; 3099 /* check if we are grabbing all columns*/ 3100 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3101 3102 if (isstride) { 3103 PetscInt start, len, mstart, mlen; 3104 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3105 PetscCall(ISGetLocalSize(iscol, &len)); 3106 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3107 if (mstart == start && mlen - mstart == len) gisstride = 1; 3108 } 3109 3110 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3111 if (gisstride) { 3112 PetscInt N; 3113 PetscCall(MatGetSize(mat, NULL, &N)); 3114 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3115 PetscCall(ISSetIdentity(iscol_local)); 3116 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3117 } else { 3118 PetscInt cbs; 3119 PetscCall(ISGetBlockSize(iscol, &cbs)); 3120 PetscCall(ISAllGather(iscol, &iscol_local)); 3121 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3122 } 3123 3124 *isseq = iscol_local; 3125 PetscFunctionReturn(PETSC_SUCCESS); 3126 } 3127 3128 /* 3129 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3130 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3131 3132 Input Parameters: 3133 + mat - matrix 3134 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3135 i.e., mat->rstart <= isrow[i] < mat->rend 3136 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3137 i.e., mat->cstart <= iscol[i] < mat->cend 3138 3139 Output Parameters: 3140 + isrow_d - sequential row index set for retrieving mat->A 3141 . iscol_d - sequential column index set for retrieving mat->A 3142 . iscol_o - sequential column index set for retrieving mat->B 3143 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3144 */ 3145 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3146 { 3147 Vec x, cmap; 3148 const PetscInt *is_idx; 3149 PetscScalar *xarray, *cmaparray; 3150 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3152 Mat B = a->B; 3153 Vec lvec = a->lvec, lcmap; 3154 PetscInt i, cstart, cend, Bn = B->cmap->N; 3155 MPI_Comm comm; 3156 VecScatter Mvctx = a->Mvctx; 3157 3158 PetscFunctionBegin; 3159 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3160 PetscCall(ISGetLocalSize(iscol, &ncols)); 3161 3162 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3163 PetscCall(MatCreateVecs(mat, &x, NULL)); 3164 PetscCall(VecSet(x, -1.0)); 3165 PetscCall(VecDuplicate(x, &cmap)); 3166 PetscCall(VecSet(cmap, -1.0)); 3167 3168 /* Get start indices */ 3169 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3170 isstart -= ncols; 3171 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3172 3173 PetscCall(ISGetIndices(iscol, &is_idx)); 3174 PetscCall(VecGetArray(x, &xarray)); 3175 PetscCall(VecGetArray(cmap, &cmaparray)); 3176 PetscCall(PetscMalloc1(ncols, &idx)); 3177 for (i = 0; i < ncols; i++) { 3178 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3179 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3180 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3181 } 3182 PetscCall(VecRestoreArray(x, &xarray)); 3183 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3184 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3185 3186 /* Get iscol_d */ 3187 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3188 PetscCall(ISGetBlockSize(iscol, &i)); 3189 PetscCall(ISSetBlockSize(*iscol_d, i)); 3190 3191 /* Get isrow_d */ 3192 PetscCall(ISGetLocalSize(isrow, &m)); 3193 rstart = mat->rmap->rstart; 3194 PetscCall(PetscMalloc1(m, &idx)); 3195 PetscCall(ISGetIndices(isrow, &is_idx)); 3196 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3197 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3198 3199 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3200 PetscCall(ISGetBlockSize(isrow, &i)); 3201 PetscCall(ISSetBlockSize(*isrow_d, i)); 3202 3203 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3204 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3205 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3206 3207 PetscCall(VecDuplicate(lvec, &lcmap)); 3208 3209 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3210 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3211 3212 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3213 /* off-process column indices */ 3214 count = 0; 3215 PetscCall(PetscMalloc1(Bn, &idx)); 3216 PetscCall(PetscMalloc1(Bn, &cmap1)); 3217 3218 PetscCall(VecGetArray(lvec, &xarray)); 3219 PetscCall(VecGetArray(lcmap, &cmaparray)); 3220 for (i = 0; i < Bn; i++) { 3221 if (PetscRealPart(xarray[i]) > -1.0) { 3222 idx[count] = i; /* local column index in off-diagonal part B */ 3223 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3224 count++; 3225 } 3226 } 3227 PetscCall(VecRestoreArray(lvec, &xarray)); 3228 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3229 3230 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3231 /* cannot ensure iscol_o has same blocksize as iscol! */ 3232 3233 PetscCall(PetscFree(idx)); 3234 *garray = cmap1; 3235 3236 PetscCall(VecDestroy(&x)); 3237 PetscCall(VecDestroy(&cmap)); 3238 PetscCall(VecDestroy(&lcmap)); 3239 PetscFunctionReturn(PETSC_SUCCESS); 3240 } 3241 3242 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3243 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3244 { 3245 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3246 Mat M = NULL; 3247 MPI_Comm comm; 3248 IS iscol_d, isrow_d, iscol_o; 3249 Mat Asub = NULL, Bsub = NULL; 3250 PetscInt n, count, M_size, N_size; 3251 3252 PetscFunctionBegin; 3253 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3254 3255 if (call == MAT_REUSE_MATRIX) { 3256 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3257 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3258 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3259 3260 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3261 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3262 3263 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3264 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3265 3266 /* Update diagonal and off-diagonal portions of submat */ 3267 asub = (Mat_MPIAIJ *)(*submat)->data; 3268 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3269 PetscCall(ISGetLocalSize(iscol_o, &n)); 3270 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3271 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3272 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3273 3274 } else { /* call == MAT_INITIAL_MATRIX) */ 3275 PetscInt *garray, *garray_compact; 3276 PetscInt BsubN; 3277 3278 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3279 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3280 3281 /* Create local submatrices Asub and Bsub */ 3282 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3283 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3284 3285 // Compact garray so its not of size Bn 3286 PetscCall(ISGetSize(iscol_o, &count)); 3287 PetscCall(PetscMalloc1(count, &garray_compact)); 3288 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3289 3290 /* Create submatrix M */ 3291 PetscCall(ISGetSize(isrow, &M_size)); 3292 PetscCall(ISGetSize(iscol, &N_size)); 3293 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3294 3295 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3296 asub = (Mat_MPIAIJ *)M->data; 3297 3298 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3299 n = asub->B->cmap->N; 3300 if (BsubN > n) { 3301 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3302 const PetscInt *idx; 3303 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3304 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3305 3306 PetscCall(PetscMalloc1(n, &idx_new)); 3307 j = 0; 3308 PetscCall(ISGetIndices(iscol_o, &idx)); 3309 for (i = 0; i < n; i++) { 3310 if (j >= BsubN) break; 3311 while (subgarray[i] > garray[j]) j++; 3312 3313 PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3314 idx_new[i] = idx[j++]; 3315 } 3316 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3317 3318 PetscCall(ISDestroy(&iscol_o)); 3319 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3320 3321 } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3322 3323 PetscCall(PetscFree(garray)); 3324 *submat = M; 3325 3326 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3327 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3328 PetscCall(ISDestroy(&isrow_d)); 3329 3330 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3331 PetscCall(ISDestroy(&iscol_d)); 3332 3333 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3334 PetscCall(ISDestroy(&iscol_o)); 3335 } 3336 PetscFunctionReturn(PETSC_SUCCESS); 3337 } 3338 3339 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3340 { 3341 IS iscol_local = NULL, isrow_d; 3342 PetscInt csize; 3343 PetscInt n, i, j, start, end; 3344 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3345 MPI_Comm comm; 3346 3347 PetscFunctionBegin; 3348 /* If isrow has same processor distribution as mat, 3349 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3350 if (call == MAT_REUSE_MATRIX) { 3351 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3352 if (isrow_d) { 3353 sameRowDist = PETSC_TRUE; 3354 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3355 } else { 3356 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3357 if (iscol_local) { 3358 sameRowDist = PETSC_TRUE; 3359 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3360 } 3361 } 3362 } else { 3363 /* Check if isrow has same processor distribution as mat */ 3364 sameDist[0] = PETSC_FALSE; 3365 PetscCall(ISGetLocalSize(isrow, &n)); 3366 if (!n) { 3367 sameDist[0] = PETSC_TRUE; 3368 } else { 3369 PetscCall(ISGetMinMax(isrow, &i, &j)); 3370 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3371 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3372 } 3373 3374 /* Check if iscol has same processor distribution as mat */ 3375 sameDist[1] = PETSC_FALSE; 3376 PetscCall(ISGetLocalSize(iscol, &n)); 3377 if (!n) { 3378 sameDist[1] = PETSC_TRUE; 3379 } else { 3380 PetscCall(ISGetMinMax(iscol, &i, &j)); 3381 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3382 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3383 } 3384 3385 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3386 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPI_C_BOOL, MPI_LAND, comm)); 3387 sameRowDist = tsameDist[0]; 3388 } 3389 3390 if (sameRowDist) { 3391 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3392 /* isrow and iscol have same processor distribution as mat */ 3393 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3394 PetscFunctionReturn(PETSC_SUCCESS); 3395 } else { /* sameRowDist */ 3396 /* isrow has same processor distribution as mat */ 3397 if (call == MAT_INITIAL_MATRIX) { 3398 PetscBool sorted; 3399 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3400 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3401 PetscCall(ISGetSize(iscol, &i)); 3402 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3403 3404 PetscCall(ISSorted(iscol_local, &sorted)); 3405 if (sorted) { 3406 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3407 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } 3410 } else { /* call == MAT_REUSE_MATRIX */ 3411 IS iscol_sub; 3412 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3413 if (iscol_sub) { 3414 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3415 PetscFunctionReturn(PETSC_SUCCESS); 3416 } 3417 } 3418 } 3419 } 3420 3421 /* General case: iscol -> iscol_local which has global size of iscol */ 3422 if (call == MAT_REUSE_MATRIX) { 3423 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3424 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3425 } else { 3426 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3427 } 3428 3429 PetscCall(ISGetLocalSize(iscol, &csize)); 3430 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3431 3432 if (call == MAT_INITIAL_MATRIX) { 3433 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3434 PetscCall(ISDestroy(&iscol_local)); 3435 } 3436 PetscFunctionReturn(PETSC_SUCCESS); 3437 } 3438 3439 /*@C 3440 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3441 and "off-diagonal" part of the matrix in CSR format. 3442 3443 Collective 3444 3445 Input Parameters: 3446 + comm - MPI communicator 3447 . M - the global row size 3448 . N - the global column size 3449 . A - "diagonal" portion of matrix 3450 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3451 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3452 3453 Output Parameter: 3454 . mat - the matrix, with input `A` as its local diagonal matrix 3455 3456 Level: advanced 3457 3458 Notes: 3459 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3460 3461 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3462 3463 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3464 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3465 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3466 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3467 3468 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3469 3470 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3471 @*/ 3472 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3473 { 3474 PetscInt m, n; 3475 MatType mpi_mat_type; 3476 Mat_MPIAIJ *mpiaij; 3477 Mat C; 3478 3479 PetscFunctionBegin; 3480 PetscCall(MatCreate(comm, &C)); 3481 PetscCall(MatGetSize(A, &m, &n)); 3482 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3483 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3484 3485 PetscCall(MatSetSizes(C, m, n, M, N)); 3486 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3487 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3488 PetscCall(MatSetType(C, mpi_mat_type)); 3489 if (!garray) { 3490 const PetscScalar *ba; 3491 3492 B->nonzerostate++; 3493 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3494 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3495 } 3496 3497 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3498 PetscCall(PetscLayoutSetUp(C->rmap)); 3499 PetscCall(PetscLayoutSetUp(C->cmap)); 3500 3501 mpiaij = (Mat_MPIAIJ *)C->data; 3502 mpiaij->A = A; 3503 mpiaij->B = B; 3504 mpiaij->garray = garray; 3505 C->preallocated = PETSC_TRUE; 3506 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3507 3508 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3509 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3510 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3511 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3512 */ 3513 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3514 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3515 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3516 *mat = C; 3517 PetscFunctionReturn(PETSC_SUCCESS); 3518 } 3519 3520 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3521 3522 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3523 { 3524 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3525 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3526 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3527 Mat M, Msub, B = a->B; 3528 MatScalar *aa; 3529 Mat_SeqAIJ *aij; 3530 PetscInt *garray = a->garray, *colsub, Ncols; 3531 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3532 IS iscol_sub, iscmap; 3533 const PetscInt *is_idx, *cmap; 3534 PetscBool allcolumns = PETSC_FALSE; 3535 MPI_Comm comm; 3536 3537 PetscFunctionBegin; 3538 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3539 if (call == MAT_REUSE_MATRIX) { 3540 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3541 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3542 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3543 3544 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3545 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3546 3547 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3548 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3549 3550 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3551 3552 } else { /* call == MAT_INITIAL_MATRIX) */ 3553 PetscBool flg; 3554 3555 PetscCall(ISGetLocalSize(iscol, &n)); 3556 PetscCall(ISGetSize(iscol, &Ncols)); 3557 3558 /* (1) iscol -> nonscalable iscol_local */ 3559 /* Check for special case: each processor gets entire matrix columns */ 3560 PetscCall(ISIdentity(iscol_local, &flg)); 3561 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3562 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3563 if (allcolumns) { 3564 iscol_sub = iscol_local; 3565 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3566 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3567 3568 } else { 3569 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3570 PetscInt *idx, *cmap1, k; 3571 PetscCall(PetscMalloc1(Ncols, &idx)); 3572 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3573 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3574 count = 0; 3575 k = 0; 3576 for (i = 0; i < Ncols; i++) { 3577 j = is_idx[i]; 3578 if (j >= cstart && j < cend) { 3579 /* diagonal part of mat */ 3580 idx[count] = j; 3581 cmap1[count++] = i; /* column index in submat */ 3582 } else if (Bn) { 3583 /* off-diagonal part of mat */ 3584 if (j == garray[k]) { 3585 idx[count] = j; 3586 cmap1[count++] = i; /* column index in submat */ 3587 } else if (j > garray[k]) { 3588 while (j > garray[k] && k < Bn - 1) k++; 3589 if (j == garray[k]) { 3590 idx[count] = j; 3591 cmap1[count++] = i; /* column index in submat */ 3592 } 3593 } 3594 } 3595 } 3596 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3597 3598 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3599 PetscCall(ISGetBlockSize(iscol, &cbs)); 3600 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3601 3602 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3603 } 3604 3605 /* (3) Create sequential Msub */ 3606 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3607 } 3608 3609 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3610 aij = (Mat_SeqAIJ *)Msub->data; 3611 ii = aij->i; 3612 PetscCall(ISGetIndices(iscmap, &cmap)); 3613 3614 /* 3615 m - number of local rows 3616 Ncols - number of columns (same on all processors) 3617 rstart - first row in new global matrix generated 3618 */ 3619 PetscCall(MatGetSize(Msub, &m, NULL)); 3620 3621 if (call == MAT_INITIAL_MATRIX) { 3622 /* (4) Create parallel newmat */ 3623 PetscMPIInt rank, size; 3624 PetscInt csize; 3625 3626 PetscCallMPI(MPI_Comm_size(comm, &size)); 3627 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3628 3629 /* 3630 Determine the number of non-zeros in the diagonal and off-diagonal 3631 portions of the matrix in order to do correct preallocation 3632 */ 3633 3634 /* first get start and end of "diagonal" columns */ 3635 PetscCall(ISGetLocalSize(iscol, &csize)); 3636 if (csize == PETSC_DECIDE) { 3637 PetscCall(ISGetSize(isrow, &mglobal)); 3638 if (mglobal == Ncols) { /* square matrix */ 3639 nlocal = m; 3640 } else { 3641 nlocal = Ncols / size + ((Ncols % size) > rank); 3642 } 3643 } else { 3644 nlocal = csize; 3645 } 3646 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3647 rstart = rend - nlocal; 3648 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3649 3650 /* next, compute all the lengths */ 3651 jj = aij->j; 3652 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3653 olens = dlens + m; 3654 for (i = 0; i < m; i++) { 3655 jend = ii[i + 1] - ii[i]; 3656 olen = 0; 3657 dlen = 0; 3658 for (j = 0; j < jend; j++) { 3659 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3660 else dlen++; 3661 jj++; 3662 } 3663 olens[i] = olen; 3664 dlens[i] = dlen; 3665 } 3666 3667 PetscCall(ISGetBlockSize(isrow, &bs)); 3668 PetscCall(ISGetBlockSize(iscol, &cbs)); 3669 3670 PetscCall(MatCreate(comm, &M)); 3671 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3672 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3673 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3674 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3675 PetscCall(PetscFree(dlens)); 3676 3677 } else { /* call == MAT_REUSE_MATRIX */ 3678 M = *newmat; 3679 PetscCall(MatGetLocalSize(M, &i, NULL)); 3680 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3681 PetscCall(MatZeroEntries(M)); 3682 /* 3683 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3684 rather than the slower MatSetValues(). 3685 */ 3686 M->was_assembled = PETSC_TRUE; 3687 M->assembled = PETSC_FALSE; 3688 } 3689 3690 /* (5) Set values of Msub to *newmat */ 3691 PetscCall(PetscMalloc1(count, &colsub)); 3692 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3693 3694 jj = aij->j; 3695 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3696 for (i = 0; i < m; i++) { 3697 row = rstart + i; 3698 nz = ii[i + 1] - ii[i]; 3699 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3700 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3701 jj += nz; 3702 aa += nz; 3703 } 3704 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3705 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3706 3707 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3708 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3709 3710 PetscCall(PetscFree(colsub)); 3711 3712 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3713 if (call == MAT_INITIAL_MATRIX) { 3714 *newmat = M; 3715 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3716 PetscCall(MatDestroy(&Msub)); 3717 3718 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3719 PetscCall(ISDestroy(&iscol_sub)); 3720 3721 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3722 PetscCall(ISDestroy(&iscmap)); 3723 3724 if (iscol_local) { 3725 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3726 PetscCall(ISDestroy(&iscol_local)); 3727 } 3728 } 3729 PetscFunctionReturn(PETSC_SUCCESS); 3730 } 3731 3732 /* 3733 Not great since it makes two copies of the submatrix, first an SeqAIJ 3734 in local and then by concatenating the local matrices the end result. 3735 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3736 3737 This requires a sequential iscol with all indices. 3738 */ 3739 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3740 { 3741 PetscMPIInt rank, size; 3742 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3743 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3744 Mat M, Mreuse; 3745 MatScalar *aa, *vwork; 3746 MPI_Comm comm; 3747 Mat_SeqAIJ *aij; 3748 PetscBool colflag, allcolumns = PETSC_FALSE; 3749 3750 PetscFunctionBegin; 3751 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3752 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3753 PetscCallMPI(MPI_Comm_size(comm, &size)); 3754 3755 /* Check for special case: each processor gets entire matrix columns */ 3756 PetscCall(ISIdentity(iscol, &colflag)); 3757 PetscCall(ISGetLocalSize(iscol, &n)); 3758 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3759 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3760 3761 if (call == MAT_REUSE_MATRIX) { 3762 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3763 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3764 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3765 } else { 3766 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3767 } 3768 3769 /* 3770 m - number of local rows 3771 n - number of columns (same on all processors) 3772 rstart - first row in new global matrix generated 3773 */ 3774 PetscCall(MatGetSize(Mreuse, &m, &n)); 3775 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3776 if (call == MAT_INITIAL_MATRIX) { 3777 aij = (Mat_SeqAIJ *)Mreuse->data; 3778 ii = aij->i; 3779 jj = aij->j; 3780 3781 /* 3782 Determine the number of non-zeros in the diagonal and off-diagonal 3783 portions of the matrix in order to do correct preallocation 3784 */ 3785 3786 /* first get start and end of "diagonal" columns */ 3787 if (csize == PETSC_DECIDE) { 3788 PetscCall(ISGetSize(isrow, &mglobal)); 3789 if (mglobal == n) { /* square matrix */ 3790 nlocal = m; 3791 } else { 3792 nlocal = n / size + ((n % size) > rank); 3793 } 3794 } else { 3795 nlocal = csize; 3796 } 3797 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3798 rstart = rend - nlocal; 3799 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3800 3801 /* next, compute all the lengths */ 3802 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3803 olens = dlens + m; 3804 for (i = 0; i < m; i++) { 3805 jend = ii[i + 1] - ii[i]; 3806 olen = 0; 3807 dlen = 0; 3808 for (j = 0; j < jend; j++) { 3809 if (*jj < rstart || *jj >= rend) olen++; 3810 else dlen++; 3811 jj++; 3812 } 3813 olens[i] = olen; 3814 dlens[i] = dlen; 3815 } 3816 PetscCall(MatCreate(comm, &M)); 3817 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3818 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3819 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3820 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3821 PetscCall(PetscFree(dlens)); 3822 } else { 3823 PetscInt ml, nl; 3824 3825 M = *newmat; 3826 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3827 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3828 PetscCall(MatZeroEntries(M)); 3829 /* 3830 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3831 rather than the slower MatSetValues(). 3832 */ 3833 M->was_assembled = PETSC_TRUE; 3834 M->assembled = PETSC_FALSE; 3835 } 3836 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3837 aij = (Mat_SeqAIJ *)Mreuse->data; 3838 ii = aij->i; 3839 jj = aij->j; 3840 3841 /* trigger copy to CPU if needed */ 3842 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3843 for (i = 0; i < m; i++) { 3844 row = rstart + i; 3845 nz = ii[i + 1] - ii[i]; 3846 cwork = jj; 3847 jj = PetscSafePointerPlusOffset(jj, nz); 3848 vwork = aa; 3849 aa = PetscSafePointerPlusOffset(aa, nz); 3850 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3851 } 3852 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3853 3854 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3855 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3856 *newmat = M; 3857 3858 /* save submatrix used in processor for next request */ 3859 if (call == MAT_INITIAL_MATRIX) { 3860 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3861 PetscCall(MatDestroy(&Mreuse)); 3862 } 3863 PetscFunctionReturn(PETSC_SUCCESS); 3864 } 3865 3866 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3867 { 3868 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3869 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3870 const PetscInt *JJ; 3871 PetscBool nooffprocentries; 3872 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3873 3874 PetscFunctionBegin; 3875 PetscCall(PetscLayoutSetUp(B->rmap)); 3876 PetscCall(PetscLayoutSetUp(B->cmap)); 3877 m = B->rmap->n; 3878 cstart = B->cmap->rstart; 3879 cend = B->cmap->rend; 3880 rstart = B->rmap->rstart; 3881 irstart = Ii[0]; 3882 3883 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3884 3885 if (PetscDefined(USE_DEBUG)) { 3886 for (i = 0; i < m; i++) { 3887 nnz = Ii[i + 1] - Ii[i]; 3888 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3889 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3890 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3891 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3892 } 3893 } 3894 3895 for (i = 0; i < m; i++) { 3896 nnz = Ii[i + 1] - Ii[i]; 3897 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3898 nnz_max = PetscMax(nnz_max, nnz); 3899 d = 0; 3900 for (j = 0; j < nnz; j++) { 3901 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3902 } 3903 d_nnz[i] = d; 3904 o_nnz[i] = nnz - d; 3905 } 3906 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3907 PetscCall(PetscFree2(d_nnz, o_nnz)); 3908 3909 for (i = 0; i < m; i++) { 3910 ii = i + rstart; 3911 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3912 } 3913 nooffprocentries = B->nooffprocentries; 3914 B->nooffprocentries = PETSC_TRUE; 3915 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3916 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3917 B->nooffprocentries = nooffprocentries; 3918 3919 /* count number of entries below block diagonal */ 3920 PetscCall(PetscFree(Aij->ld)); 3921 PetscCall(PetscCalloc1(m, &ld)); 3922 Aij->ld = ld; 3923 for (i = 0; i < m; i++) { 3924 nnz = Ii[i + 1] - Ii[i]; 3925 j = 0; 3926 while (j < nnz && J[j] < cstart) j++; 3927 ld[i] = j; 3928 if (J) J += nnz; 3929 } 3930 3931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3932 PetscFunctionReturn(PETSC_SUCCESS); 3933 } 3934 3935 /*@ 3936 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3937 (the default parallel PETSc format). 3938 3939 Collective 3940 3941 Input Parameters: 3942 + B - the matrix 3943 . i - the indices into `j` for the start of each local row (indices start with zero) 3944 . j - the column indices for each local row (indices start with zero) 3945 - v - optional values in the matrix 3946 3947 Level: developer 3948 3949 Notes: 3950 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3951 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3952 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3953 3954 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3955 3956 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3957 3958 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3959 3960 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3961 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3962 3963 The format which is used for the sparse matrix input, is equivalent to a 3964 row-major ordering.. i.e for the following matrix, the input data expected is 3965 as shown 3966 .vb 3967 1 0 0 3968 2 0 3 P0 3969 ------- 3970 4 5 6 P1 3971 3972 Process0 [P0] rows_owned=[0,1] 3973 i = {0,1,3} [size = nrow+1 = 2+1] 3974 j = {0,0,2} [size = 3] 3975 v = {1,2,3} [size = 3] 3976 3977 Process1 [P1] rows_owned=[2] 3978 i = {0,3} [size = nrow+1 = 1+1] 3979 j = {0,1,2} [size = 3] 3980 v = {4,5,6} [size = 3] 3981 .ve 3982 3983 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3984 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3985 @*/ 3986 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3987 { 3988 PetscFunctionBegin; 3989 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3990 PetscFunctionReturn(PETSC_SUCCESS); 3991 } 3992 3993 /*@ 3994 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3995 (the default parallel PETSc format). For good matrix assembly performance 3996 the user should preallocate the matrix storage by setting the parameters 3997 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 3998 3999 Collective 4000 4001 Input Parameters: 4002 + B - the matrix 4003 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4004 (same value is used for all local rows) 4005 . d_nnz - array containing the number of nonzeros in the various rows of the 4006 DIAGONAL portion of the local submatrix (possibly different for each row) 4007 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4008 The size of this array is equal to the number of local rows, i.e 'm'. 4009 For matrices that will be factored, you must leave room for (and set) 4010 the diagonal entry even if it is zero. 4011 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4012 submatrix (same value is used for all local rows). 4013 - o_nnz - array containing the number of nonzeros in the various rows of the 4014 OFF-DIAGONAL portion of the local submatrix (possibly different for 4015 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4016 structure. The size of this array is equal to the number 4017 of local rows, i.e 'm'. 4018 4019 Example Usage: 4020 Consider the following 8x8 matrix with 34 non-zero values, that is 4021 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4022 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4023 as follows 4024 4025 .vb 4026 1 2 0 | 0 3 0 | 0 4 4027 Proc0 0 5 6 | 7 0 0 | 8 0 4028 9 0 10 | 11 0 0 | 12 0 4029 ------------------------------------- 4030 13 0 14 | 15 16 17 | 0 0 4031 Proc1 0 18 0 | 19 20 21 | 0 0 4032 0 0 0 | 22 23 0 | 24 0 4033 ------------------------------------- 4034 Proc2 25 26 27 | 0 0 28 | 29 0 4035 30 0 0 | 31 32 33 | 0 34 4036 .ve 4037 4038 This can be represented as a collection of submatrices as 4039 .vb 4040 A B C 4041 D E F 4042 G H I 4043 .ve 4044 4045 Where the submatrices A,B,C are owned by proc0, D,E,F are 4046 owned by proc1, G,H,I are owned by proc2. 4047 4048 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4049 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4050 The 'M','N' parameters are 8,8, and have the same values on all procs. 4051 4052 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4053 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4054 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4055 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4056 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4057 matrix, and [DF] as another `MATSEQAIJ` matrix. 4058 4059 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4060 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4061 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4062 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4063 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4064 In this case, the values of `d_nz`, `o_nz` are 4065 .vb 4066 proc0 dnz = 2, o_nz = 2 4067 proc1 dnz = 3, o_nz = 2 4068 proc2 dnz = 1, o_nz = 4 4069 .ve 4070 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4071 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4072 for proc3. i.e we are using 12+15+10=37 storage locations to store 4073 34 values. 4074 4075 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4076 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4077 In the above case the values for `d_nnz`, `o_nnz` are 4078 .vb 4079 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4080 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4081 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4082 .ve 4083 Here the space allocated is sum of all the above values i.e 34, and 4084 hence pre-allocation is perfect. 4085 4086 Level: intermediate 4087 4088 Notes: 4089 If the *_nnz parameter is given then the *_nz parameter is ignored 4090 4091 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4092 storage. The stored row and column indices begin with zero. 4093 See [Sparse Matrices](sec_matsparse) for details. 4094 4095 The parallel matrix is partitioned such that the first m0 rows belong to 4096 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4097 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4098 4099 The DIAGONAL portion of the local submatrix of a processor can be defined 4100 as the submatrix which is obtained by extraction the part corresponding to 4101 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4102 first row that belongs to the processor, r2 is the last row belonging to 4103 the this processor, and c1-c2 is range of indices of the local part of a 4104 vector suitable for applying the matrix to. This is an mxn matrix. In the 4105 common case of a square matrix, the row and column ranges are the same and 4106 the DIAGONAL part is also square. The remaining portion of the local 4107 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4108 4109 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4110 4111 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4112 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4113 You can also run with the option `-info` and look for messages with the string 4114 malloc in them to see if additional memory allocation was needed. 4115 4116 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4117 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4118 @*/ 4119 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4120 { 4121 PetscFunctionBegin; 4122 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4123 PetscValidType(B, 1); 4124 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4125 PetscFunctionReturn(PETSC_SUCCESS); 4126 } 4127 4128 /*@ 4129 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4130 CSR format for the local rows. 4131 4132 Collective 4133 4134 Input Parameters: 4135 + comm - MPI communicator 4136 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4137 . n - This value should be the same as the local size used in creating the 4138 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4139 calculated if `N` is given) For square matrices n is almost always `m`. 4140 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4141 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4142 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4143 . j - global column indices 4144 - a - optional matrix values 4145 4146 Output Parameter: 4147 . mat - the matrix 4148 4149 Level: intermediate 4150 4151 Notes: 4152 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4153 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4154 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4155 4156 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4157 4158 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4159 4160 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4161 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4162 4163 The format which is used for the sparse matrix input, is equivalent to a 4164 row-major ordering, i.e., for the following matrix, the input data expected is 4165 as shown 4166 .vb 4167 1 0 0 4168 2 0 3 P0 4169 ------- 4170 4 5 6 P1 4171 4172 Process0 [P0] rows_owned=[0,1] 4173 i = {0,1,3} [size = nrow+1 = 2+1] 4174 j = {0,0,2} [size = 3] 4175 v = {1,2,3} [size = 3] 4176 4177 Process1 [P1] rows_owned=[2] 4178 i = {0,3} [size = nrow+1 = 1+1] 4179 j = {0,1,2} [size = 3] 4180 v = {4,5,6} [size = 3] 4181 .ve 4182 4183 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4184 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4185 @*/ 4186 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4187 { 4188 PetscFunctionBegin; 4189 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4190 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4191 PetscCall(MatCreate(comm, mat)); 4192 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4193 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4194 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4195 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4196 PetscFunctionReturn(PETSC_SUCCESS); 4197 } 4198 4199 /*@ 4200 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4201 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4202 from `MatCreateMPIAIJWithArrays()` 4203 4204 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4205 4206 Collective 4207 4208 Input Parameters: 4209 + mat - the matrix 4210 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4211 . n - This value should be the same as the local size used in creating the 4212 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4213 calculated if N is given) For square matrices n is almost always m. 4214 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4215 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4216 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4217 . J - column indices 4218 - v - matrix values 4219 4220 Level: deprecated 4221 4222 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4223 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4224 @*/ 4225 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4226 { 4227 PetscInt nnz, i; 4228 PetscBool nooffprocentries; 4229 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4230 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4231 PetscScalar *ad, *ao; 4232 PetscInt ldi, Iii, md; 4233 const PetscInt *Adi = Ad->i; 4234 PetscInt *ld = Aij->ld; 4235 4236 PetscFunctionBegin; 4237 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4238 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4239 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4240 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4241 4242 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4243 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4244 4245 for (i = 0; i < m; i++) { 4246 if (PetscDefined(USE_DEBUG)) { 4247 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4248 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4249 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4250 } 4251 } 4252 nnz = Ii[i + 1] - Ii[i]; 4253 Iii = Ii[i]; 4254 ldi = ld[i]; 4255 md = Adi[i + 1] - Adi[i]; 4256 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4257 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4258 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4259 ad += md; 4260 ao += nnz - md; 4261 } 4262 nooffprocentries = mat->nooffprocentries; 4263 mat->nooffprocentries = PETSC_TRUE; 4264 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4265 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4266 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4267 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4268 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4269 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4270 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4271 mat->nooffprocentries = nooffprocentries; 4272 PetscFunctionReturn(PETSC_SUCCESS); 4273 } 4274 4275 /*@ 4276 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4277 4278 Collective 4279 4280 Input Parameters: 4281 + mat - the matrix 4282 - v - matrix values, stored by row 4283 4284 Level: intermediate 4285 4286 Notes: 4287 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4288 4289 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4290 4291 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4292 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4293 @*/ 4294 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4295 { 4296 PetscInt nnz, i, m; 4297 PetscBool nooffprocentries; 4298 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4299 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4300 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4301 PetscScalar *ad, *ao; 4302 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4303 PetscInt ldi, Iii, md; 4304 PetscInt *ld = Aij->ld; 4305 4306 PetscFunctionBegin; 4307 m = mat->rmap->n; 4308 4309 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4310 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4311 Iii = 0; 4312 for (i = 0; i < m; i++) { 4313 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4314 ldi = ld[i]; 4315 md = Adi[i + 1] - Adi[i]; 4316 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4317 ad += md; 4318 if (ao) { 4319 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4320 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4321 ao += nnz - md; 4322 } 4323 Iii += nnz; 4324 } 4325 nooffprocentries = mat->nooffprocentries; 4326 mat->nooffprocentries = PETSC_TRUE; 4327 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4328 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4329 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4330 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4332 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4333 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4334 mat->nooffprocentries = nooffprocentries; 4335 PetscFunctionReturn(PETSC_SUCCESS); 4336 } 4337 4338 /*@ 4339 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4340 (the default parallel PETSc format). For good matrix assembly performance 4341 the user should preallocate the matrix storage by setting the parameters 4342 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4343 4344 Collective 4345 4346 Input Parameters: 4347 + comm - MPI communicator 4348 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4349 This value should be the same as the local size used in creating the 4350 y vector for the matrix-vector product y = Ax. 4351 . n - This value should be the same as the local size used in creating the 4352 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4353 calculated if N is given) For square matrices n is almost always m. 4354 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4355 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4356 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4357 (same value is used for all local rows) 4358 . d_nnz - array containing the number of nonzeros in the various rows of the 4359 DIAGONAL portion of the local submatrix (possibly different for each row) 4360 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4361 The size of this array is equal to the number of local rows, i.e 'm'. 4362 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4363 submatrix (same value is used for all local rows). 4364 - o_nnz - array containing the number of nonzeros in the various rows of the 4365 OFF-DIAGONAL portion of the local submatrix (possibly different for 4366 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4367 structure. The size of this array is equal to the number 4368 of local rows, i.e 'm'. 4369 4370 Output Parameter: 4371 . A - the matrix 4372 4373 Options Database Keys: 4374 + -mat_no_inode - Do not use inodes 4375 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4376 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4377 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4378 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4379 4380 Level: intermediate 4381 4382 Notes: 4383 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4384 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4385 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4386 4387 If the *_nnz parameter is given then the *_nz parameter is ignored 4388 4389 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4390 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4391 storage requirements for this matrix. 4392 4393 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4394 processor than it must be used on all processors that share the object for 4395 that argument. 4396 4397 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4398 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4399 4400 The user MUST specify either the local or global matrix dimensions 4401 (possibly both). 4402 4403 The parallel matrix is partitioned across processors such that the 4404 first `m0` rows belong to process 0, the next `m1` rows belong to 4405 process 1, the next `m2` rows belong to process 2, etc., where 4406 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4407 values corresponding to [m x N] submatrix. 4408 4409 The columns are logically partitioned with the n0 columns belonging 4410 to 0th partition, the next n1 columns belonging to the next 4411 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4412 4413 The DIAGONAL portion of the local submatrix on any given processor 4414 is the submatrix corresponding to the rows and columns m,n 4415 corresponding to the given processor. i.e diagonal matrix on 4416 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4417 etc. The remaining portion of the local submatrix [m x (N-n)] 4418 constitute the OFF-DIAGONAL portion. The example below better 4419 illustrates this concept. The two matrices, the DIAGONAL portion and 4420 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4421 4422 For a square global matrix we define each processor's diagonal portion 4423 to be its local rows and the corresponding columns (a square submatrix); 4424 each processor's off-diagonal portion encompasses the remainder of the 4425 local matrix (a rectangular submatrix). 4426 4427 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4428 4429 When calling this routine with a single process communicator, a matrix of 4430 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4431 type of communicator, use the construction mechanism 4432 .vb 4433 MatCreate(..., &A); 4434 MatSetType(A, MATMPIAIJ); 4435 MatSetSizes(A, m, n, M, N); 4436 MatMPIAIJSetPreallocation(A, ...); 4437 .ve 4438 4439 By default, this format uses inodes (identical nodes) when possible. 4440 We search for consecutive rows with the same nonzero structure, thereby 4441 reusing matrix information to achieve increased efficiency. 4442 4443 Example Usage: 4444 Consider the following 8x8 matrix with 34 non-zero values, that is 4445 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4446 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4447 as follows 4448 4449 .vb 4450 1 2 0 | 0 3 0 | 0 4 4451 Proc0 0 5 6 | 7 0 0 | 8 0 4452 9 0 10 | 11 0 0 | 12 0 4453 ------------------------------------- 4454 13 0 14 | 15 16 17 | 0 0 4455 Proc1 0 18 0 | 19 20 21 | 0 0 4456 0 0 0 | 22 23 0 | 24 0 4457 ------------------------------------- 4458 Proc2 25 26 27 | 0 0 28 | 29 0 4459 30 0 0 | 31 32 33 | 0 34 4460 .ve 4461 4462 This can be represented as a collection of submatrices as 4463 4464 .vb 4465 A B C 4466 D E F 4467 G H I 4468 .ve 4469 4470 Where the submatrices A,B,C are owned by proc0, D,E,F are 4471 owned by proc1, G,H,I are owned by proc2. 4472 4473 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4474 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4475 The 'M','N' parameters are 8,8, and have the same values on all procs. 4476 4477 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4478 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4479 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4480 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4481 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4482 matrix, and [DF] as another SeqAIJ matrix. 4483 4484 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4485 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4486 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4487 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4488 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4489 In this case, the values of `d_nz`,`o_nz` are 4490 .vb 4491 proc0 dnz = 2, o_nz = 2 4492 proc1 dnz = 3, o_nz = 2 4493 proc2 dnz = 1, o_nz = 4 4494 .ve 4495 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4496 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4497 for proc3. i.e we are using 12+15+10=37 storage locations to store 4498 34 values. 4499 4500 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4501 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4502 In the above case the values for d_nnz,o_nnz are 4503 .vb 4504 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4505 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4506 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4507 .ve 4508 Here the space allocated is sum of all the above values i.e 34, and 4509 hence pre-allocation is perfect. 4510 4511 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4512 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4513 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4514 @*/ 4515 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4516 { 4517 PetscMPIInt size; 4518 4519 PetscFunctionBegin; 4520 PetscCall(MatCreate(comm, A)); 4521 PetscCall(MatSetSizes(*A, m, n, M, N)); 4522 PetscCallMPI(MPI_Comm_size(comm, &size)); 4523 if (size > 1) { 4524 PetscCall(MatSetType(*A, MATMPIAIJ)); 4525 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4526 } else { 4527 PetscCall(MatSetType(*A, MATSEQAIJ)); 4528 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4529 } 4530 PetscFunctionReturn(PETSC_SUCCESS); 4531 } 4532 4533 /*@C 4534 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4535 4536 Not Collective 4537 4538 Input Parameter: 4539 . A - The `MATMPIAIJ` matrix 4540 4541 Output Parameters: 4542 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4543 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4544 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4545 4546 Level: intermediate 4547 4548 Note: 4549 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4550 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4551 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4552 local column numbers to global column numbers in the original matrix. 4553 4554 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4555 @*/ 4556 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4557 { 4558 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4559 PetscBool flg; 4560 4561 PetscFunctionBegin; 4562 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4563 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4564 if (Ad) *Ad = a->A; 4565 if (Ao) *Ao = a->B; 4566 if (colmap) *colmap = a->garray; 4567 PetscFunctionReturn(PETSC_SUCCESS); 4568 } 4569 4570 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4571 { 4572 PetscInt m, N, i, rstart, nnz, Ii; 4573 PetscInt *indx; 4574 PetscScalar *values; 4575 MatType rootType; 4576 4577 PetscFunctionBegin; 4578 PetscCall(MatGetSize(inmat, &m, &N)); 4579 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4580 PetscInt *dnz, *onz, sum, bs, cbs; 4581 4582 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4583 /* Check sum(n) = N */ 4584 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4585 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4586 4587 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4588 rstart -= m; 4589 4590 MatPreallocateBegin(comm, m, n, dnz, onz); 4591 for (i = 0; i < m; i++) { 4592 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4593 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4594 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4595 } 4596 4597 PetscCall(MatCreate(comm, outmat)); 4598 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4599 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4600 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4601 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4602 PetscCall(MatSetType(*outmat, rootType)); 4603 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4604 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4605 MatPreallocateEnd(dnz, onz); 4606 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4607 } 4608 4609 /* numeric phase */ 4610 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4611 for (i = 0; i < m; i++) { 4612 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4613 Ii = i + rstart; 4614 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4615 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4616 } 4617 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4618 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4619 PetscFunctionReturn(PETSC_SUCCESS); 4620 } 4621 4622 static PetscErrorCode MatMergeSeqsToMPIDestroy(PetscCtxRt data) 4623 { 4624 MatMergeSeqsToMPI *merge = *(MatMergeSeqsToMPI **)data; 4625 4626 PetscFunctionBegin; 4627 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4628 PetscCall(PetscFree(merge->id_r)); 4629 PetscCall(PetscFree(merge->len_s)); 4630 PetscCall(PetscFree(merge->len_r)); 4631 PetscCall(PetscFree(merge->bi)); 4632 PetscCall(PetscFree(merge->bj)); 4633 PetscCall(PetscFree(merge->buf_ri[0])); 4634 PetscCall(PetscFree(merge->buf_ri)); 4635 PetscCall(PetscFree(merge->buf_rj[0])); 4636 PetscCall(PetscFree(merge->buf_rj)); 4637 PetscCall(PetscFree(merge->coi)); 4638 PetscCall(PetscFree(merge->coj)); 4639 PetscCall(PetscFree(merge->owners_co)); 4640 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4641 PetscCall(PetscFree(merge)); 4642 PetscFunctionReturn(PETSC_SUCCESS); 4643 } 4644 4645 #include <../src/mat/utils/freespace.h> 4646 #include <petscbt.h> 4647 4648 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4649 { 4650 MPI_Comm comm; 4651 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4652 PetscMPIInt size, rank, taga, *len_s; 4653 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4654 PetscMPIInt proc, k; 4655 PetscInt **buf_ri, **buf_rj; 4656 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4657 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4658 MPI_Request *s_waits, *r_waits; 4659 MPI_Status *status; 4660 const MatScalar *aa, *a_a; 4661 MatScalar **abuf_r, *ba_i; 4662 MatMergeSeqsToMPI *merge; 4663 PetscContainer container; 4664 4665 PetscFunctionBegin; 4666 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4667 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4668 4669 PetscCallMPI(MPI_Comm_size(comm, &size)); 4670 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4671 4672 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4673 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4674 PetscCall(PetscContainerGetPointer(container, &merge)); 4675 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4676 aa = a_a; 4677 4678 bi = merge->bi; 4679 bj = merge->bj; 4680 buf_ri = merge->buf_ri; 4681 buf_rj = merge->buf_rj; 4682 4683 PetscCall(PetscMalloc1(size, &status)); 4684 owners = merge->rowmap->range; 4685 len_s = merge->len_s; 4686 4687 /* send and recv matrix values */ 4688 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4689 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4690 4691 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4692 for (proc = 0, k = 0; proc < size; proc++) { 4693 if (!len_s[proc]) continue; 4694 i = owners[proc]; 4695 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4696 k++; 4697 } 4698 4699 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4700 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4701 PetscCall(PetscFree(status)); 4702 4703 PetscCall(PetscFree(s_waits)); 4704 PetscCall(PetscFree(r_waits)); 4705 4706 /* insert mat values of mpimat */ 4707 PetscCall(PetscMalloc1(N, &ba_i)); 4708 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4709 4710 for (k = 0; k < merge->nrecv; k++) { 4711 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4712 nrows = *buf_ri_k[k]; 4713 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4714 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4715 } 4716 4717 /* set values of ba */ 4718 m = merge->rowmap->n; 4719 for (i = 0; i < m; i++) { 4720 arow = owners[rank] + i; 4721 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4722 bnzi = bi[i + 1] - bi[i]; 4723 PetscCall(PetscArrayzero(ba_i, bnzi)); 4724 4725 /* add local non-zero vals of this proc's seqmat into ba */ 4726 anzi = ai[arow + 1] - ai[arow]; 4727 aj = a->j + ai[arow]; 4728 aa = a_a + ai[arow]; 4729 nextaj = 0; 4730 for (j = 0; nextaj < anzi; j++) { 4731 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4732 ba_i[j] += aa[nextaj++]; 4733 } 4734 } 4735 4736 /* add received vals into ba */ 4737 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4738 /* i-th row */ 4739 if (i == *nextrow[k]) { 4740 anzi = *(nextai[k] + 1) - *nextai[k]; 4741 aj = buf_rj[k] + *nextai[k]; 4742 aa = abuf_r[k] + *nextai[k]; 4743 nextaj = 0; 4744 for (j = 0; nextaj < anzi; j++) { 4745 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4746 ba_i[j] += aa[nextaj++]; 4747 } 4748 } 4749 nextrow[k]++; 4750 nextai[k]++; 4751 } 4752 } 4753 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4754 } 4755 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4756 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4757 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4758 4759 PetscCall(PetscFree(abuf_r[0])); 4760 PetscCall(PetscFree(abuf_r)); 4761 PetscCall(PetscFree(ba_i)); 4762 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4763 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4764 PetscFunctionReturn(PETSC_SUCCESS); 4765 } 4766 4767 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4768 { 4769 Mat B_mpi; 4770 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4771 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4772 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4773 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4774 PetscInt len, *dnz, *onz, bs, cbs; 4775 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4776 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4777 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4778 MPI_Status *status; 4779 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4780 PetscBT lnkbt; 4781 MatMergeSeqsToMPI *merge; 4782 PetscContainer container; 4783 4784 PetscFunctionBegin; 4785 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4786 4787 /* make sure it is a PETSc comm */ 4788 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4789 PetscCallMPI(MPI_Comm_size(comm, &size)); 4790 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4791 4792 PetscCall(PetscNew(&merge)); 4793 PetscCall(PetscMalloc1(size, &status)); 4794 4795 /* determine row ownership */ 4796 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4797 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4798 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4799 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4800 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4801 PetscCall(PetscMalloc1(size, &len_si)); 4802 PetscCall(PetscMalloc1(size, &merge->len_s)); 4803 4804 m = merge->rowmap->n; 4805 owners = merge->rowmap->range; 4806 4807 /* determine the number of messages to send, their lengths */ 4808 len_s = merge->len_s; 4809 4810 len = 0; /* length of buf_si[] */ 4811 merge->nsend = 0; 4812 for (PetscMPIInt proc = 0; proc < size; proc++) { 4813 len_si[proc] = 0; 4814 if (proc == rank) { 4815 len_s[proc] = 0; 4816 } else { 4817 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4818 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4819 } 4820 if (len_s[proc]) { 4821 merge->nsend++; 4822 nrows = 0; 4823 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4824 if (ai[i + 1] > ai[i]) nrows++; 4825 } 4826 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4827 len += len_si[proc]; 4828 } 4829 } 4830 4831 /* determine the number and length of messages to receive for ij-structure */ 4832 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4833 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4834 4835 /* post the Irecv of j-structure */ 4836 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4837 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4838 4839 /* post the Isend of j-structure */ 4840 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4841 4842 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4843 if (!len_s[proc]) continue; 4844 i = owners[proc]; 4845 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4846 k++; 4847 } 4848 4849 /* receives and sends of j-structure are complete */ 4850 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4851 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4852 4853 /* send and recv i-structure */ 4854 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4855 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4856 4857 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4858 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4859 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4860 if (!len_s[proc]) continue; 4861 /* form outgoing message for i-structure: 4862 buf_si[0]: nrows to be sent 4863 [1:nrows]: row index (global) 4864 [nrows+1:2*nrows+1]: i-structure index 4865 */ 4866 nrows = len_si[proc] / 2 - 1; 4867 buf_si_i = buf_si + nrows + 1; 4868 buf_si[0] = nrows; 4869 buf_si_i[0] = 0; 4870 nrows = 0; 4871 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4872 anzi = ai[i + 1] - ai[i]; 4873 if (anzi) { 4874 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4875 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4876 nrows++; 4877 } 4878 } 4879 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4880 k++; 4881 buf_si += len_si[proc]; 4882 } 4883 4884 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4885 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4886 4887 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4888 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4889 4890 PetscCall(PetscFree(len_si)); 4891 PetscCall(PetscFree(len_ri)); 4892 PetscCall(PetscFree(rj_waits)); 4893 PetscCall(PetscFree2(si_waits, sj_waits)); 4894 PetscCall(PetscFree(ri_waits)); 4895 PetscCall(PetscFree(buf_s)); 4896 PetscCall(PetscFree(status)); 4897 4898 /* compute a local seq matrix in each processor */ 4899 /* allocate bi array and free space for accumulating nonzero column info */ 4900 PetscCall(PetscMalloc1(m + 1, &bi)); 4901 bi[0] = 0; 4902 4903 /* create and initialize a linked list */ 4904 nlnk = N + 1; 4905 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4906 4907 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4908 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4909 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4910 4911 current_space = free_space; 4912 4913 /* determine symbolic info for each local row */ 4914 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4915 4916 for (k = 0; k < merge->nrecv; k++) { 4917 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4918 nrows = *buf_ri_k[k]; 4919 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4920 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4921 } 4922 4923 MatPreallocateBegin(comm, m, n, dnz, onz); 4924 len = 0; 4925 for (i = 0; i < m; i++) { 4926 bnzi = 0; 4927 /* add local non-zero cols of this proc's seqmat into lnk */ 4928 arow = owners[rank] + i; 4929 anzi = ai[arow + 1] - ai[arow]; 4930 aj = a->j + ai[arow]; 4931 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4932 bnzi += nlnk; 4933 /* add received col data into lnk */ 4934 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4935 if (i == *nextrow[k]) { /* i-th row */ 4936 anzi = *(nextai[k] + 1) - *nextai[k]; 4937 aj = buf_rj[k] + *nextai[k]; 4938 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4939 bnzi += nlnk; 4940 nextrow[k]++; 4941 nextai[k]++; 4942 } 4943 } 4944 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4945 4946 /* if free space is not available, make more free space */ 4947 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4948 /* copy data into free space, then initialize lnk */ 4949 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4950 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4951 4952 current_space->array += bnzi; 4953 current_space->local_used += bnzi; 4954 current_space->local_remaining -= bnzi; 4955 4956 bi[i + 1] = bi[i] + bnzi; 4957 } 4958 4959 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4960 4961 PetscCall(PetscMalloc1(bi[m], &bj)); 4962 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4963 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4964 4965 /* create symbolic parallel matrix B_mpi */ 4966 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4967 PetscCall(MatCreate(comm, &B_mpi)); 4968 if (n == PETSC_DECIDE) { 4969 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4970 } else { 4971 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4972 } 4973 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4974 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4975 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4976 MatPreallocateEnd(dnz, onz); 4977 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4978 4979 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4980 B_mpi->assembled = PETSC_FALSE; 4981 merge->bi = bi; 4982 merge->bj = bj; 4983 merge->buf_ri = buf_ri; 4984 merge->buf_rj = buf_rj; 4985 merge->coi = NULL; 4986 merge->coj = NULL; 4987 merge->owners_co = NULL; 4988 4989 PetscCall(PetscCommDestroy(&comm)); 4990 4991 /* attach the supporting struct to B_mpi for reuse */ 4992 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4993 PetscCall(PetscContainerSetPointer(container, merge)); 4994 PetscCall(PetscContainerSetCtxDestroy(container, MatMergeSeqsToMPIDestroy)); 4995 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4996 PetscCall(PetscContainerDestroy(&container)); 4997 *mpimat = B_mpi; 4998 4999 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5000 PetscFunctionReturn(PETSC_SUCCESS); 5001 } 5002 5003 /*@ 5004 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5005 matrices from each processor 5006 5007 Collective 5008 5009 Input Parameters: 5010 + comm - the communicators the parallel matrix will live on 5011 . seqmat - the input sequential matrices 5012 . m - number of local rows (or `PETSC_DECIDE`) 5013 . n - number of local columns (or `PETSC_DECIDE`) 5014 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5015 5016 Output Parameter: 5017 . mpimat - the parallel matrix generated 5018 5019 Level: advanced 5020 5021 Note: 5022 The dimensions of the sequential matrix in each processor MUST be the same. 5023 The input seqmat is included into the container `MatMergeSeqsToMPIDestroy`, and will be 5024 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5025 5026 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5027 @*/ 5028 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5029 { 5030 PetscMPIInt size; 5031 5032 PetscFunctionBegin; 5033 PetscCallMPI(MPI_Comm_size(comm, &size)); 5034 if (size == 1) { 5035 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5036 if (scall == MAT_INITIAL_MATRIX) { 5037 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5038 } else { 5039 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5040 } 5041 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5042 PetscFunctionReturn(PETSC_SUCCESS); 5043 } 5044 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5045 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5046 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5047 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5048 PetscFunctionReturn(PETSC_SUCCESS); 5049 } 5050 5051 /*@ 5052 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5053 5054 Not Collective 5055 5056 Input Parameter: 5057 . A - the matrix 5058 5059 Output Parameter: 5060 . A_loc - the local sequential matrix generated 5061 5062 Level: developer 5063 5064 Notes: 5065 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5066 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5067 `n` is the global column count obtained with `MatGetSize()` 5068 5069 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5070 5071 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5072 5073 Destroy the matrix with `MatDestroy()` 5074 5075 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5076 @*/ 5077 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5078 { 5079 PetscBool mpi; 5080 5081 PetscFunctionBegin; 5082 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5083 if (mpi) { 5084 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5085 } else { 5086 *A_loc = A; 5087 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5088 } 5089 PetscFunctionReturn(PETSC_SUCCESS); 5090 } 5091 5092 /*@ 5093 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5094 5095 Not Collective 5096 5097 Input Parameters: 5098 + A - the matrix 5099 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5100 5101 Output Parameter: 5102 . A_loc - the local sequential matrix generated 5103 5104 Level: developer 5105 5106 Notes: 5107 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5108 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5109 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5110 5111 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5112 5113 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5114 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5115 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5116 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5117 5118 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5119 @*/ 5120 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5121 { 5122 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5123 Mat_SeqAIJ *mat, *a, *b; 5124 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5125 const PetscScalar *aa, *ba, *aav, *bav; 5126 PetscScalar *ca, *cam; 5127 PetscMPIInt size; 5128 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5129 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5130 PetscBool match; 5131 5132 PetscFunctionBegin; 5133 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5134 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5135 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5136 if (size == 1) { 5137 if (scall == MAT_INITIAL_MATRIX) { 5138 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5139 *A_loc = mpimat->A; 5140 } else if (scall == MAT_REUSE_MATRIX) { 5141 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5142 } 5143 PetscFunctionReturn(PETSC_SUCCESS); 5144 } 5145 5146 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5147 a = (Mat_SeqAIJ *)mpimat->A->data; 5148 b = (Mat_SeqAIJ *)mpimat->B->data; 5149 ai = a->i; 5150 aj = a->j; 5151 bi = b->i; 5152 bj = b->j; 5153 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5154 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5155 aa = aav; 5156 ba = bav; 5157 if (scall == MAT_INITIAL_MATRIX) { 5158 PetscCall(PetscMalloc1(1 + am, &ci)); 5159 ci[0] = 0; 5160 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5161 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5162 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5163 k = 0; 5164 for (i = 0; i < am; i++) { 5165 ncols_o = bi[i + 1] - bi[i]; 5166 ncols_d = ai[i + 1] - ai[i]; 5167 /* off-diagonal portion of A */ 5168 for (jo = 0; jo < ncols_o; jo++) { 5169 col = cmap[*bj]; 5170 if (col >= cstart) break; 5171 cj[k] = col; 5172 bj++; 5173 ca[k++] = *ba++; 5174 } 5175 /* diagonal portion of A */ 5176 for (j = 0; j < ncols_d; j++) { 5177 cj[k] = cstart + *aj++; 5178 ca[k++] = *aa++; 5179 } 5180 /* off-diagonal portion of A */ 5181 for (j = jo; j < ncols_o; j++) { 5182 cj[k] = cmap[*bj++]; 5183 ca[k++] = *ba++; 5184 } 5185 } 5186 /* put together the new matrix */ 5187 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5188 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5189 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5190 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5191 mat->free_a = PETSC_TRUE; 5192 mat->free_ij = PETSC_TRUE; 5193 mat->nonew = 0; 5194 } else if (scall == MAT_REUSE_MATRIX) { 5195 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5196 ci = mat->i; 5197 cj = mat->j; 5198 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5199 for (i = 0; i < am; i++) { 5200 /* off-diagonal portion of A */ 5201 ncols_o = bi[i + 1] - bi[i]; 5202 for (jo = 0; jo < ncols_o; jo++) { 5203 col = cmap[*bj]; 5204 if (col >= cstart) break; 5205 *cam++ = *ba++; 5206 bj++; 5207 } 5208 /* diagonal portion of A */ 5209 ncols_d = ai[i + 1] - ai[i]; 5210 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5211 /* off-diagonal portion of A */ 5212 for (j = jo; j < ncols_o; j++) { 5213 *cam++ = *ba++; 5214 bj++; 5215 } 5216 } 5217 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5218 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5219 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5220 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5221 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5222 PetscFunctionReturn(PETSC_SUCCESS); 5223 } 5224 5225 /*@ 5226 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5227 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5228 5229 Not Collective 5230 5231 Input Parameters: 5232 + A - the matrix 5233 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5234 5235 Output Parameters: 5236 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5237 - A_loc - the local sequential matrix generated 5238 5239 Level: developer 5240 5241 Note: 5242 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5243 part, then those associated with the off-diagonal part (in its local ordering) 5244 5245 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5246 @*/ 5247 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5248 { 5249 Mat Ao, Ad; 5250 const PetscInt *cmap; 5251 PetscMPIInt size; 5252 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5253 5254 PetscFunctionBegin; 5255 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5256 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5257 if (size == 1) { 5258 if (scall == MAT_INITIAL_MATRIX) { 5259 PetscCall(PetscObjectReference((PetscObject)Ad)); 5260 *A_loc = Ad; 5261 } else if (scall == MAT_REUSE_MATRIX) { 5262 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5263 } 5264 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5265 PetscFunctionReturn(PETSC_SUCCESS); 5266 } 5267 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5268 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5269 if (f) { 5270 PetscCall((*f)(A, scall, glob, A_loc)); 5271 } else { 5272 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5273 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5274 Mat_SeqAIJ *c; 5275 PetscInt *ai = a->i, *aj = a->j; 5276 PetscInt *bi = b->i, *bj = b->j; 5277 PetscInt *ci, *cj; 5278 const PetscScalar *aa, *ba; 5279 PetscScalar *ca; 5280 PetscInt i, j, am, dn, on; 5281 5282 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5283 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5284 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5285 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5286 if (scall == MAT_INITIAL_MATRIX) { 5287 PetscInt k; 5288 PetscCall(PetscMalloc1(1 + am, &ci)); 5289 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5290 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5291 ci[0] = 0; 5292 for (i = 0, k = 0; i < am; i++) { 5293 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5294 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5295 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5296 /* diagonal portion of A */ 5297 for (j = 0; j < ncols_d; j++, k++) { 5298 cj[k] = *aj++; 5299 ca[k] = *aa++; 5300 } 5301 /* off-diagonal portion of A */ 5302 for (j = 0; j < ncols_o; j++, k++) { 5303 cj[k] = dn + *bj++; 5304 ca[k] = *ba++; 5305 } 5306 } 5307 /* put together the new matrix */ 5308 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5309 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5310 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5311 c = (Mat_SeqAIJ *)(*A_loc)->data; 5312 c->free_a = PETSC_TRUE; 5313 c->free_ij = PETSC_TRUE; 5314 c->nonew = 0; 5315 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5316 } else if (scall == MAT_REUSE_MATRIX) { 5317 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5318 for (i = 0; i < am; i++) { 5319 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5320 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5321 /* diagonal portion of A */ 5322 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5323 /* off-diagonal portion of A */ 5324 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5325 } 5326 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5327 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5328 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5329 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5330 if (glob) { 5331 PetscInt cst, *gidx; 5332 5333 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5334 PetscCall(PetscMalloc1(dn + on, &gidx)); 5335 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5336 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5337 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5338 } 5339 } 5340 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5341 PetscFunctionReturn(PETSC_SUCCESS); 5342 } 5343 5344 /*@C 5345 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5346 5347 Not Collective 5348 5349 Input Parameters: 5350 + A - the matrix 5351 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5352 . row - index set of rows to extract (or `NULL`) 5353 - col - index set of columns to extract (or `NULL`) 5354 5355 Output Parameter: 5356 . A_loc - the local sequential matrix generated 5357 5358 Level: developer 5359 5360 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5361 @*/ 5362 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5363 { 5364 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5365 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5366 IS isrowa, iscola; 5367 Mat *aloc; 5368 PetscBool match; 5369 5370 PetscFunctionBegin; 5371 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5372 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5373 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5374 if (!row) { 5375 start = A->rmap->rstart; 5376 end = A->rmap->rend; 5377 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5378 } else { 5379 isrowa = *row; 5380 } 5381 if (!col) { 5382 start = A->cmap->rstart; 5383 cmap = a->garray; 5384 nzA = a->A->cmap->n; 5385 nzB = a->B->cmap->n; 5386 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5387 ncols = 0; 5388 for (i = 0; i < nzB; i++) { 5389 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5390 else break; 5391 } 5392 imark = i; 5393 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5394 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5395 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5396 } else { 5397 iscola = *col; 5398 } 5399 if (scall != MAT_INITIAL_MATRIX) { 5400 PetscCall(PetscMalloc1(1, &aloc)); 5401 aloc[0] = *A_loc; 5402 } 5403 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5404 if (!col) { /* attach global id of condensed columns */ 5405 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5406 } 5407 *A_loc = aloc[0]; 5408 PetscCall(PetscFree(aloc)); 5409 if (!row) PetscCall(ISDestroy(&isrowa)); 5410 if (!col) PetscCall(ISDestroy(&iscola)); 5411 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5412 PetscFunctionReturn(PETSC_SUCCESS); 5413 } 5414 5415 /* 5416 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5417 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5418 * on a global size. 5419 * */ 5420 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5421 { 5422 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5423 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5424 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5425 PetscMPIInt owner; 5426 PetscSFNode *iremote, *oiremote; 5427 const PetscInt *lrowindices; 5428 PetscSF sf, osf; 5429 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5430 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5431 MPI_Comm comm; 5432 ISLocalToGlobalMapping mapping; 5433 const PetscScalar *pd_a, *po_a; 5434 5435 PetscFunctionBegin; 5436 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5437 /* plocalsize is the number of roots 5438 * nrows is the number of leaves 5439 * */ 5440 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5441 PetscCall(ISGetLocalSize(rows, &nrows)); 5442 PetscCall(PetscCalloc1(nrows, &iremote)); 5443 PetscCall(ISGetIndices(rows, &lrowindices)); 5444 for (i = 0; i < nrows; i++) { 5445 /* Find a remote index and an owner for a row 5446 * The row could be local or remote 5447 * */ 5448 owner = 0; 5449 lidx = 0; 5450 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5451 iremote[i].index = lidx; 5452 iremote[i].rank = owner; 5453 } 5454 /* Create SF to communicate how many nonzero columns for each row */ 5455 PetscCall(PetscSFCreate(comm, &sf)); 5456 /* SF will figure out the number of nonzero columns for each row, and their 5457 * offsets 5458 * */ 5459 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5460 PetscCall(PetscSFSetFromOptions(sf)); 5461 PetscCall(PetscSFSetUp(sf)); 5462 5463 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5464 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5465 PetscCall(PetscCalloc1(nrows, &pnnz)); 5466 roffsets[0] = 0; 5467 roffsets[1] = 0; 5468 for (i = 0; i < plocalsize; i++) { 5469 /* diagonal */ 5470 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5471 /* off-diagonal */ 5472 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5473 /* compute offsets so that we relative location for each row */ 5474 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5475 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5476 } 5477 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5478 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5479 /* 'r' means root, and 'l' means leaf */ 5480 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5481 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5482 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5483 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5484 PetscCall(PetscSFDestroy(&sf)); 5485 PetscCall(PetscFree(roffsets)); 5486 PetscCall(PetscFree(nrcols)); 5487 dntotalcols = 0; 5488 ontotalcols = 0; 5489 ncol = 0; 5490 for (i = 0; i < nrows; i++) { 5491 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5492 ncol = PetscMax(pnnz[i], ncol); 5493 /* diagonal */ 5494 dntotalcols += nlcols[i * 2 + 0]; 5495 /* off-diagonal */ 5496 ontotalcols += nlcols[i * 2 + 1]; 5497 } 5498 /* We do not need to figure the right number of columns 5499 * since all the calculations will be done by going through the raw data 5500 * */ 5501 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5502 PetscCall(MatSetUp(*P_oth)); 5503 PetscCall(PetscFree(pnnz)); 5504 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5505 /* diagonal */ 5506 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5507 /* off-diagonal */ 5508 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5509 /* diagonal */ 5510 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5511 /* off-diagonal */ 5512 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5513 dntotalcols = 0; 5514 ontotalcols = 0; 5515 ntotalcols = 0; 5516 for (i = 0; i < nrows; i++) { 5517 owner = 0; 5518 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5519 /* Set iremote for diag matrix */ 5520 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5521 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5522 iremote[dntotalcols].rank = owner; 5523 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5524 ilocal[dntotalcols++] = ntotalcols++; 5525 } 5526 /* off-diagonal */ 5527 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5528 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5529 oiremote[ontotalcols].rank = owner; 5530 oilocal[ontotalcols++] = ntotalcols++; 5531 } 5532 } 5533 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5534 PetscCall(PetscFree(loffsets)); 5535 PetscCall(PetscFree(nlcols)); 5536 PetscCall(PetscSFCreate(comm, &sf)); 5537 /* P serves as roots and P_oth is leaves 5538 * Diag matrix 5539 * */ 5540 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5541 PetscCall(PetscSFSetFromOptions(sf)); 5542 PetscCall(PetscSFSetUp(sf)); 5543 5544 PetscCall(PetscSFCreate(comm, &osf)); 5545 /* off-diagonal */ 5546 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5547 PetscCall(PetscSFSetFromOptions(osf)); 5548 PetscCall(PetscSFSetUp(osf)); 5549 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5550 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5551 /* operate on the matrix internal data to save memory */ 5552 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5553 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5554 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5555 /* Convert to global indices for diag matrix */ 5556 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5557 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5558 /* We want P_oth store global indices */ 5559 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5560 /* Use memory scalable approach */ 5561 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5562 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5563 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5564 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5565 /* Convert back to local indices */ 5566 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5567 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5568 nout = 0; 5569 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5570 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5571 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5572 /* Exchange values */ 5573 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5574 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5575 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5576 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5577 /* Stop PETSc from shrinking memory */ 5578 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5579 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5580 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5581 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5582 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5583 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5584 PetscCall(PetscSFDestroy(&sf)); 5585 PetscCall(PetscSFDestroy(&osf)); 5586 PetscFunctionReturn(PETSC_SUCCESS); 5587 } 5588 5589 /* 5590 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5591 * This supports MPIAIJ and MAIJ 5592 * */ 5593 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5594 { 5595 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5596 Mat_SeqAIJ *p_oth; 5597 IS rows, map; 5598 PetscHMapI hamp; 5599 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5600 MPI_Comm comm; 5601 PetscSF sf, osf; 5602 PetscBool has; 5603 5604 PetscFunctionBegin; 5605 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5606 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5607 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5608 * and then create a submatrix (that often is an overlapping matrix) 5609 * */ 5610 if (reuse == MAT_INITIAL_MATRIX) { 5611 /* Use a hash table to figure out unique keys */ 5612 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5613 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5614 count = 0; 5615 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5616 for (i = 0; i < a->B->cmap->n; i++) { 5617 key = a->garray[i] / dof; 5618 PetscCall(PetscHMapIHas(hamp, key, &has)); 5619 if (!has) { 5620 mapping[i] = count; 5621 PetscCall(PetscHMapISet(hamp, key, count++)); 5622 } else { 5623 /* Current 'i' has the same value the previous step */ 5624 mapping[i] = count - 1; 5625 } 5626 } 5627 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5628 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5629 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5630 PetscCall(PetscCalloc1(htsize, &rowindices)); 5631 off = 0; 5632 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5633 PetscCall(PetscHMapIDestroy(&hamp)); 5634 PetscCall(PetscSortInt(htsize, rowindices)); 5635 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5636 /* In case, the matrix was already created but users want to recreate the matrix */ 5637 PetscCall(MatDestroy(P_oth)); 5638 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5639 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5640 PetscCall(ISDestroy(&map)); 5641 PetscCall(ISDestroy(&rows)); 5642 } else if (reuse == MAT_REUSE_MATRIX) { 5643 /* If matrix was already created, we simply update values using SF objects 5644 * that as attached to the matrix earlier. 5645 */ 5646 const PetscScalar *pd_a, *po_a; 5647 5648 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5649 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5650 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5651 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5652 /* Update values in place */ 5653 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5654 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5655 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5656 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5657 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5658 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5659 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5660 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5661 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5662 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5663 PetscFunctionReturn(PETSC_SUCCESS); 5664 } 5665 5666 /*@C 5667 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5668 5669 Collective 5670 5671 Input Parameters: 5672 + A - the first matrix in `MATMPIAIJ` format 5673 . B - the second matrix in `MATMPIAIJ` format 5674 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5675 5676 Output Parameters: 5677 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5678 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5679 - B_seq - the sequential matrix generated 5680 5681 Level: developer 5682 5683 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5684 @*/ 5685 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5686 { 5687 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5688 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5689 IS isrowb, iscolb; 5690 Mat *bseq = NULL; 5691 5692 PetscFunctionBegin; 5693 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5694 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5695 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5696 5697 if (scall == MAT_INITIAL_MATRIX) { 5698 start = A->cmap->rstart; 5699 cmap = a->garray; 5700 nzA = a->A->cmap->n; 5701 nzB = a->B->cmap->n; 5702 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5703 ncols = 0; 5704 for (i = 0; i < nzB; i++) { /* row < local row index */ 5705 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5706 else break; 5707 } 5708 imark = i; 5709 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5710 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5711 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5712 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5713 } else { 5714 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5715 isrowb = *rowb; 5716 iscolb = *colb; 5717 PetscCall(PetscMalloc1(1, &bseq)); 5718 bseq[0] = *B_seq; 5719 } 5720 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5721 *B_seq = bseq[0]; 5722 PetscCall(PetscFree(bseq)); 5723 if (!rowb) { 5724 PetscCall(ISDestroy(&isrowb)); 5725 } else { 5726 *rowb = isrowb; 5727 } 5728 if (!colb) { 5729 PetscCall(ISDestroy(&iscolb)); 5730 } else { 5731 *colb = iscolb; 5732 } 5733 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5734 PetscFunctionReturn(PETSC_SUCCESS); 5735 } 5736 5737 /* 5738 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5739 of the OFF-DIAGONAL portion of local A 5740 5741 Collective 5742 5743 Input Parameters: 5744 + A,B - the matrices in `MATMPIAIJ` format 5745 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5746 5747 Output Parameter: 5748 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5749 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5750 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5751 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5752 5753 Developer Note: 5754 This directly accesses information inside the VecScatter associated with the matrix-vector product 5755 for this matrix. This is not desirable.. 5756 5757 Level: developer 5758 5759 */ 5760 5761 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5762 { 5763 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5764 VecScatter ctx; 5765 MPI_Comm comm; 5766 const PetscMPIInt *rprocs, *sprocs; 5767 PetscMPIInt nrecvs, nsends; 5768 const PetscInt *srow, *rstarts, *sstarts; 5769 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5770 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5771 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5772 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5773 PetscMPIInt size, tag, rank, nreqs; 5774 5775 PetscFunctionBegin; 5776 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5777 PetscCallMPI(MPI_Comm_size(comm, &size)); 5778 5779 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5780 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5781 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5782 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5783 5784 if (size == 1) { 5785 startsj_s = NULL; 5786 bufa_ptr = NULL; 5787 *B_oth = NULL; 5788 PetscFunctionReturn(PETSC_SUCCESS); 5789 } 5790 5791 ctx = a->Mvctx; 5792 tag = ((PetscObject)ctx)->tag; 5793 5794 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5795 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5796 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5797 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5798 PetscCall(PetscMalloc1(nreqs, &reqs)); 5799 rwaits = reqs; 5800 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5801 5802 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5803 if (scall == MAT_INITIAL_MATRIX) { 5804 /* i-array */ 5805 /* post receives */ 5806 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5807 for (i = 0; i < nrecvs; i++) { 5808 rowlen = rvalues + rstarts[i] * rbs; 5809 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5810 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5811 } 5812 5813 /* pack the outgoing message */ 5814 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5815 5816 sstartsj[0] = 0; 5817 rstartsj[0] = 0; 5818 len = 0; /* total length of j or a array to be sent */ 5819 if (nsends) { 5820 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5821 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5822 } 5823 for (i = 0; i < nsends; i++) { 5824 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5825 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5826 for (j = 0; j < nrows; j++) { 5827 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5828 for (l = 0; l < sbs; l++) { 5829 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5830 5831 rowlen[j * sbs + l] = ncols; 5832 5833 len += ncols; 5834 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5835 } 5836 k++; 5837 } 5838 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5839 5840 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5841 } 5842 /* recvs and sends of i-array are completed */ 5843 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5844 PetscCall(PetscFree(svalues)); 5845 5846 /* allocate buffers for sending j and a arrays */ 5847 PetscCall(PetscMalloc1(len, &bufj)); 5848 PetscCall(PetscMalloc1(len, &bufa)); 5849 5850 /* create i-array of B_oth */ 5851 PetscCall(PetscMalloc1(aBn + 1, &b_othi)); 5852 5853 b_othi[0] = 0; 5854 len = 0; /* total length of j or a array to be received */ 5855 k = 0; 5856 for (i = 0; i < nrecvs; i++) { 5857 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5858 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5859 for (j = 0; j < nrows; j++) { 5860 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5861 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5862 k++; 5863 } 5864 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5865 } 5866 PetscCall(PetscFree(rvalues)); 5867 5868 /* allocate space for j and a arrays of B_oth */ 5869 PetscCall(PetscMalloc1(b_othi[aBn], &b_othj)); 5870 PetscCall(PetscMalloc1(b_othi[aBn], &b_otha)); 5871 5872 /* j-array */ 5873 /* post receives of j-array */ 5874 for (i = 0; i < nrecvs; i++) { 5875 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5876 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_othj, rstartsj[i]), nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5877 } 5878 5879 /* pack the outgoing message j-array */ 5880 if (nsends) k = sstarts[0]; 5881 for (i = 0; i < nsends; i++) { 5882 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5883 bufJ = PetscSafePointerPlusOffset(bufj, sstartsj[i]); 5884 for (j = 0; j < nrows; j++) { 5885 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5886 for (ll = 0; ll < sbs; ll++) { 5887 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5888 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5889 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5890 } 5891 } 5892 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufj, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5893 } 5894 5895 /* recvs and sends of j-array are completed */ 5896 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5897 } else if (scall == MAT_REUSE_MATRIX) { 5898 sstartsj = *startsj_s; 5899 rstartsj = *startsj_r; 5900 bufa = *bufa_ptr; 5901 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5902 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5903 5904 /* a-array */ 5905 /* post receives of a-array */ 5906 for (i = 0; i < nrecvs; i++) { 5907 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5908 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_otha, rstartsj[i]), nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5909 } 5910 5911 /* pack the outgoing message a-array */ 5912 if (nsends) k = sstarts[0]; 5913 for (i = 0; i < nsends; i++) { 5914 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5915 bufA = PetscSafePointerPlusOffset(bufa, sstartsj[i]); 5916 for (j = 0; j < nrows; j++) { 5917 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5918 for (ll = 0; ll < sbs; ll++) { 5919 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5920 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5921 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5922 } 5923 } 5924 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufa, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5925 } 5926 /* recvs and sends of a-array are completed */ 5927 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5928 PetscCall(PetscFree(reqs)); 5929 5930 if (scall == MAT_INITIAL_MATRIX) { 5931 Mat_SeqAIJ *b_oth; 5932 5933 /* put together the new matrix */ 5934 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5935 5936 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5937 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5938 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5939 b_oth->free_a = PETSC_TRUE; 5940 b_oth->free_ij = PETSC_TRUE; 5941 b_oth->nonew = 0; 5942 5943 PetscCall(PetscFree(bufj)); 5944 if (!startsj_s || !bufa_ptr) { 5945 PetscCall(PetscFree2(sstartsj, rstartsj)); 5946 PetscCall(PetscFree(bufa_ptr)); 5947 } else { 5948 *startsj_s = sstartsj; 5949 *startsj_r = rstartsj; 5950 *bufa_ptr = bufa; 5951 } 5952 } else if (scall == MAT_REUSE_MATRIX) { 5953 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5954 } 5955 5956 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5957 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5958 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5959 PetscFunctionReturn(PETSC_SUCCESS); 5960 } 5961 5962 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5963 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5965 #if defined(PETSC_HAVE_MKL_SPARSE) 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5967 #endif 5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5969 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5970 #if defined(PETSC_HAVE_ELEMENTAL) 5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5972 #endif 5973 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 5974 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5975 #endif 5976 #if defined(PETSC_HAVE_HYPRE) 5977 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5978 #endif 5979 #if defined(PETSC_HAVE_CUDA) 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5981 #endif 5982 #if defined(PETSC_HAVE_HIP) 5983 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 5984 #endif 5985 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5987 #endif 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5989 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5990 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5991 5992 /* 5993 Computes (B'*A')' since computing B*A directly is untenable 5994 5995 n p p 5996 [ ] [ ] [ ] 5997 m [ A ] * n [ B ] = m [ C ] 5998 [ ] [ ] [ ] 5999 6000 */ 6001 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6002 { 6003 Mat At, Bt, Ct; 6004 6005 PetscFunctionBegin; 6006 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6007 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6008 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6009 PetscCall(MatDestroy(&At)); 6010 PetscCall(MatDestroy(&Bt)); 6011 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6012 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6013 PetscCall(MatDestroy(&Ct)); 6014 PetscFunctionReturn(PETSC_SUCCESS); 6015 } 6016 6017 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6018 { 6019 PetscBool cisdense; 6020 6021 PetscFunctionBegin; 6022 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6023 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6024 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6025 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6026 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6027 PetscCall(MatSetUp(C)); 6028 6029 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6030 PetscFunctionReturn(PETSC_SUCCESS); 6031 } 6032 6033 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6034 { 6035 Mat_Product *product = C->product; 6036 Mat A = product->A, B = product->B; 6037 6038 PetscFunctionBegin; 6039 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6040 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6041 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6042 C->ops->productsymbolic = MatProductSymbolic_AB; 6043 PetscFunctionReturn(PETSC_SUCCESS); 6044 } 6045 6046 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6047 { 6048 Mat_Product *product = C->product; 6049 6050 PetscFunctionBegin; 6051 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6052 PetscFunctionReturn(PETSC_SUCCESS); 6053 } 6054 6055 /* 6056 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6057 6058 Input Parameters: 6059 6060 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6061 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6062 6063 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6064 6065 For Set1, j1[] contains column indices of the nonzeros. 6066 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6067 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6068 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6069 6070 Similar for Set2. 6071 6072 This routine merges the two sets of nonzeros row by row and removes repeats. 6073 6074 Output Parameters: (memory is allocated by the caller) 6075 6076 i[],j[]: the CSR of the merged matrix, which has m rows. 6077 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6078 imap2[]: similar to imap1[], but for Set2. 6079 Note we order nonzeros row-by-row and from left to right. 6080 */ 6081 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6082 { 6083 PetscInt r, m; /* Row index of mat */ 6084 PetscCount t, t1, t2, b1, e1, b2, e2; 6085 6086 PetscFunctionBegin; 6087 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6088 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6089 i[0] = 0; 6090 for (r = 0; r < m; r++) { /* Do row by row merging */ 6091 b1 = rowBegin1[r]; 6092 e1 = rowEnd1[r]; 6093 b2 = rowBegin2[r]; 6094 e2 = rowEnd2[r]; 6095 while (b1 < e1 && b2 < e2) { 6096 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6097 j[t] = j1[b1]; 6098 imap1[t1] = t; 6099 imap2[t2] = t; 6100 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6101 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6102 t1++; 6103 t2++; 6104 t++; 6105 } else if (j1[b1] < j2[b2]) { 6106 j[t] = j1[b1]; 6107 imap1[t1] = t; 6108 b1 += jmap1[t1 + 1] - jmap1[t1]; 6109 t1++; 6110 t++; 6111 } else { 6112 j[t] = j2[b2]; 6113 imap2[t2] = t; 6114 b2 += jmap2[t2 + 1] - jmap2[t2]; 6115 t2++; 6116 t++; 6117 } 6118 } 6119 /* Merge the remaining in either j1[] or j2[] */ 6120 while (b1 < e1) { 6121 j[t] = j1[b1]; 6122 imap1[t1] = t; 6123 b1 += jmap1[t1 + 1] - jmap1[t1]; 6124 t1++; 6125 t++; 6126 } 6127 while (b2 < e2) { 6128 j[t] = j2[b2]; 6129 imap2[t2] = t; 6130 b2 += jmap2[t2 + 1] - jmap2[t2]; 6131 t2++; 6132 t++; 6133 } 6134 PetscCall(PetscIntCast(t, i + r + 1)); 6135 } 6136 PetscFunctionReturn(PETSC_SUCCESS); 6137 } 6138 6139 /* 6140 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6141 6142 Input Parameters: 6143 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6144 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6145 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6146 6147 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6148 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6149 6150 Output Parameters: 6151 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6152 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6153 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6154 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6155 6156 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6157 Atot: number of entries belonging to the diagonal block. 6158 Annz: number of unique nonzeros belonging to the diagonal block. 6159 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6160 repeats (i.e., same 'i,j' pair). 6161 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6162 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6163 6164 Atot: number of entries belonging to the diagonal block 6165 Annz: number of unique nonzeros belonging to the diagonal block. 6166 6167 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6168 6169 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6170 */ 6171 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6172 { 6173 PetscInt cstart, cend, rstart, rend, row, col; 6174 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6175 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6176 PetscCount k, m, p, q, r, s, mid; 6177 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6178 6179 PetscFunctionBegin; 6180 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6181 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6182 m = rend - rstart; 6183 6184 /* Skip negative rows */ 6185 for (k = 0; k < n; k++) 6186 if (i[k] >= 0) break; 6187 6188 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6189 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6190 */ 6191 while (k < n) { 6192 row = i[k]; 6193 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6194 for (s = k; s < n; s++) 6195 if (i[s] != row) break; 6196 6197 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6198 for (p = k; p < s; p++) { 6199 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6200 } 6201 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6202 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6203 rowBegin[row - rstart] = k; 6204 rowMid[row - rstart] = mid; 6205 rowEnd[row - rstart] = s; 6206 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6207 6208 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6209 Atot += mid - k; 6210 Btot += s - mid; 6211 6212 /* Count unique nonzeros of this diag row */ 6213 for (p = k; p < mid;) { 6214 col = j[p]; 6215 do { 6216 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6217 p++; 6218 } while (p < mid && j[p] == col); 6219 Annz++; 6220 } 6221 6222 /* Count unique nonzeros of this offdiag row */ 6223 for (p = mid; p < s;) { 6224 col = j[p]; 6225 do { 6226 p++; 6227 } while (p < s && j[p] == col); 6228 Bnnz++; 6229 } 6230 k = s; 6231 } 6232 6233 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6234 PetscCall(PetscMalloc1(Atot, &Aperm)); 6235 PetscCall(PetscMalloc1(Btot, &Bperm)); 6236 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6237 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6238 6239 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6240 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6241 for (r = 0; r < m; r++) { 6242 k = rowBegin[r]; 6243 mid = rowMid[r]; 6244 s = rowEnd[r]; 6245 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6246 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6247 Atot += mid - k; 6248 Btot += s - mid; 6249 6250 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6251 for (p = k; p < mid;) { 6252 col = j[p]; 6253 q = p; 6254 do { 6255 p++; 6256 } while (p < mid && j[p] == col); 6257 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6258 Annz++; 6259 } 6260 6261 for (p = mid; p < s;) { 6262 col = j[p]; 6263 q = p; 6264 do { 6265 p++; 6266 } while (p < s && j[p] == col); 6267 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6268 Bnnz++; 6269 } 6270 } 6271 /* Output */ 6272 *Aperm_ = Aperm; 6273 *Annz_ = Annz; 6274 *Atot_ = Atot; 6275 *Ajmap_ = Ajmap; 6276 *Bperm_ = Bperm; 6277 *Bnnz_ = Bnnz; 6278 *Btot_ = Btot; 6279 *Bjmap_ = Bjmap; 6280 PetscFunctionReturn(PETSC_SUCCESS); 6281 } 6282 6283 /* 6284 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6285 6286 Input Parameters: 6287 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6288 nnz: number of unique nonzeros in the merged matrix 6289 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6290 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6291 6292 Output Parameter: (memory is allocated by the caller) 6293 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6294 6295 Example: 6296 nnz1 = 4 6297 nnz = 6 6298 imap = [1,3,4,5] 6299 jmap = [0,3,5,6,7] 6300 then, 6301 jmap_new = [0,0,3,3,5,6,7] 6302 */ 6303 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6304 { 6305 PetscCount k, p; 6306 6307 PetscFunctionBegin; 6308 jmap_new[0] = 0; 6309 p = nnz; /* p loops over jmap_new[] backwards */ 6310 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6311 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6312 } 6313 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6314 PetscFunctionReturn(PETSC_SUCCESS); 6315 } 6316 6317 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(PetscCtxRt data) 6318 { 6319 MatCOOStruct_MPIAIJ *coo = *(MatCOOStruct_MPIAIJ **)data; 6320 6321 PetscFunctionBegin; 6322 PetscCall(PetscSFDestroy(&coo->sf)); 6323 PetscCall(PetscFree(coo->Aperm1)); 6324 PetscCall(PetscFree(coo->Bperm1)); 6325 PetscCall(PetscFree(coo->Ajmap1)); 6326 PetscCall(PetscFree(coo->Bjmap1)); 6327 PetscCall(PetscFree(coo->Aimap2)); 6328 PetscCall(PetscFree(coo->Bimap2)); 6329 PetscCall(PetscFree(coo->Aperm2)); 6330 PetscCall(PetscFree(coo->Bperm2)); 6331 PetscCall(PetscFree(coo->Ajmap2)); 6332 PetscCall(PetscFree(coo->Bjmap2)); 6333 PetscCall(PetscFree(coo->Cperm1)); 6334 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6335 PetscCall(PetscFree(coo)); 6336 PetscFunctionReturn(PETSC_SUCCESS); 6337 } 6338 6339 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6340 { 6341 MPI_Comm comm; 6342 PetscMPIInt rank, size; 6343 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6344 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6345 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6346 PetscContainer container; 6347 MatCOOStruct_MPIAIJ *coo; 6348 6349 PetscFunctionBegin; 6350 PetscCall(PetscFree(mpiaij->garray)); 6351 PetscCall(VecDestroy(&mpiaij->lvec)); 6352 #if defined(PETSC_USE_CTABLE) 6353 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6354 #else 6355 PetscCall(PetscFree(mpiaij->colmap)); 6356 #endif 6357 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6358 mat->assembled = PETSC_FALSE; 6359 mat->was_assembled = PETSC_FALSE; 6360 6361 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6362 PetscCallMPI(MPI_Comm_size(comm, &size)); 6363 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6364 PetscCall(PetscLayoutSetUp(mat->rmap)); 6365 PetscCall(PetscLayoutSetUp(mat->cmap)); 6366 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6367 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6368 PetscCall(MatGetLocalSize(mat, &m, &n)); 6369 PetscCall(MatGetSize(mat, &M, &N)); 6370 6371 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6372 /* entries come first, then local rows, then remote rows. */ 6373 PetscCount n1 = coo_n, *perm1; 6374 PetscInt *i1 = coo_i, *j1 = coo_j; 6375 6376 PetscCall(PetscMalloc1(n1, &perm1)); 6377 for (k = 0; k < n1; k++) perm1[k] = k; 6378 6379 /* Manipulate indices so that entries with negative row or col indices will have smallest 6380 row indices, local entries will have greater but negative row indices, and remote entries 6381 will have positive row indices. 6382 */ 6383 for (k = 0; k < n1; k++) { 6384 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6385 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6386 else { 6387 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6388 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6389 } 6390 } 6391 6392 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6393 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6394 6395 /* Advance k to the first entry we need to take care of */ 6396 for (k = 0; k < n1; k++) 6397 if (i1[k] > PETSC_INT_MIN) break; 6398 PetscCount i1start = k; 6399 6400 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6401 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6402 6403 PetscCheck(n1 == 0 || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6404 6405 /* Send remote rows to their owner */ 6406 /* Find which rows should be sent to which remote ranks*/ 6407 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6408 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6409 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6410 const PetscInt *ranges; 6411 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6412 6413 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6414 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6415 for (k = rem; k < n1;) { 6416 PetscMPIInt owner; 6417 PetscInt firstRow, lastRow; 6418 6419 /* Locate a row range */ 6420 firstRow = i1[k]; /* first row of this owner */ 6421 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6422 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6423 6424 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6425 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6426 6427 /* All entries in [k,p) belong to this remote owner */ 6428 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6429 PetscMPIInt *sendto2; 6430 PetscInt *nentries2; 6431 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6432 6433 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6434 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6435 PetscCall(PetscArraycpy(nentries2, nentries, maxNsend)); 6436 PetscCall(PetscFree2(sendto, nentries)); 6437 sendto = sendto2; 6438 nentries = nentries2; 6439 maxNsend = maxNsend2; 6440 } 6441 sendto[nsend] = owner; 6442 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6443 nsend++; 6444 k = p; 6445 } 6446 6447 /* Build 1st SF to know offsets on remote to send data */ 6448 PetscSF sf1; 6449 PetscInt nroots = 1, nroots2 = 0; 6450 PetscInt nleaves = nsend, nleaves2 = 0; 6451 PetscInt *offsets; 6452 PetscSFNode *iremote; 6453 6454 PetscCall(PetscSFCreate(comm, &sf1)); 6455 PetscCall(PetscMalloc1(nsend, &iremote)); 6456 PetscCall(PetscMalloc1(nsend, &offsets)); 6457 for (k = 0; k < nsend; k++) { 6458 iremote[k].rank = sendto[k]; 6459 iremote[k].index = 0; 6460 nleaves2 += nentries[k]; 6461 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6462 } 6463 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6464 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6465 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6466 PetscCall(PetscSFDestroy(&sf1)); 6467 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6468 6469 /* Build 2nd SF to send remote COOs to their owner */ 6470 PetscSF sf2; 6471 nroots = nroots2; 6472 nleaves = nleaves2; 6473 PetscCall(PetscSFCreate(comm, &sf2)); 6474 PetscCall(PetscSFSetFromOptions(sf2)); 6475 PetscCall(PetscMalloc1(nleaves, &iremote)); 6476 p = 0; 6477 for (k = 0; k < nsend; k++) { 6478 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6479 for (q = 0; q < nentries[k]; q++, p++) { 6480 iremote[p].rank = sendto[k]; 6481 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6482 } 6483 } 6484 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6485 6486 /* Send the remote COOs to their owner */ 6487 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6488 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6489 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6490 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6491 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6492 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6493 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6494 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6495 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6496 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6497 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6498 6499 PetscCall(PetscFree(offsets)); 6500 PetscCall(PetscFree2(sendto, nentries)); 6501 6502 /* Sort received COOs by row along with the permutation array */ 6503 for (k = 0; k < n2; k++) perm2[k] = k; 6504 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6505 6506 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6507 PetscCount *Cperm1; 6508 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6509 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6510 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6511 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6512 6513 /* Support for HYPRE matrices, kind of a hack. 6514 Swap min column with diagonal so that diagonal values will go first */ 6515 PetscBool hypre; 6516 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6517 if (hypre) { 6518 PetscInt *minj; 6519 PetscBT hasdiag; 6520 6521 PetscCall(PetscBTCreate(m, &hasdiag)); 6522 PetscCall(PetscMalloc1(m, &minj)); 6523 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6524 for (k = i1start; k < rem; k++) { 6525 if (j1[k] < cstart || j1[k] >= cend) continue; 6526 const PetscInt rindex = i1[k] - rstart; 6527 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6528 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6529 } 6530 for (k = 0; k < n2; k++) { 6531 if (j2[k] < cstart || j2[k] >= cend) continue; 6532 const PetscInt rindex = i2[k] - rstart; 6533 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6534 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6535 } 6536 for (k = i1start; k < rem; k++) { 6537 const PetscInt rindex = i1[k] - rstart; 6538 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6539 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6540 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6541 } 6542 for (k = 0; k < n2; k++) { 6543 const PetscInt rindex = i2[k] - rstart; 6544 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6545 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6546 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6547 } 6548 PetscCall(PetscBTDestroy(&hasdiag)); 6549 PetscCall(PetscFree(minj)); 6550 } 6551 6552 /* Split local COOs and received COOs into diag/offdiag portions */ 6553 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6554 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6555 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6556 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6557 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6558 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6559 6560 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6561 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6562 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6563 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6564 6565 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6566 PetscInt *Ai, *Bi; 6567 PetscInt *Aj, *Bj; 6568 6569 PetscCall(PetscMalloc1(m + 1, &Ai)); 6570 PetscCall(PetscMalloc1(m + 1, &Bi)); 6571 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6572 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6573 6574 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6575 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6576 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6577 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6578 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6579 6580 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6581 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6582 6583 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6584 /* expect nonzeros in A/B most likely have local contributing entries */ 6585 PetscInt Annz = Ai[m]; 6586 PetscInt Bnnz = Bi[m]; 6587 PetscCount *Ajmap1_new, *Bjmap1_new; 6588 6589 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6590 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6591 6592 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6593 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6594 6595 PetscCall(PetscFree(Aimap1)); 6596 PetscCall(PetscFree(Ajmap1)); 6597 PetscCall(PetscFree(Bimap1)); 6598 PetscCall(PetscFree(Bjmap1)); 6599 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6600 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6601 PetscCall(PetscFree(perm1)); 6602 PetscCall(PetscFree3(i2, j2, perm2)); 6603 6604 Ajmap1 = Ajmap1_new; 6605 Bjmap1 = Bjmap1_new; 6606 6607 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6608 if (Annz < Annz1 + Annz2) { 6609 PetscInt *Aj_new; 6610 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6611 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6612 PetscCall(PetscFree(Aj)); 6613 Aj = Aj_new; 6614 } 6615 6616 if (Bnnz < Bnnz1 + Bnnz2) { 6617 PetscInt *Bj_new; 6618 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6619 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6620 PetscCall(PetscFree(Bj)); 6621 Bj = Bj_new; 6622 } 6623 6624 /* Create new submatrices for on-process and off-process coupling */ 6625 PetscScalar *Aa, *Ba; 6626 MatType rtype; 6627 Mat_SeqAIJ *a, *b; 6628 PetscObjectState state; 6629 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6630 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6631 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6632 if (cstart) { 6633 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6634 } 6635 6636 PetscCall(MatGetRootType_Private(mat, &rtype)); 6637 6638 MatSeqXAIJGetOptions_Private(mpiaij->A); 6639 PetscCall(MatDestroy(&mpiaij->A)); 6640 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6641 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6642 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6643 6644 MatSeqXAIJGetOptions_Private(mpiaij->B); 6645 PetscCall(MatDestroy(&mpiaij->B)); 6646 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6647 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6648 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6649 6650 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6651 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6652 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6653 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6654 6655 a = (Mat_SeqAIJ *)mpiaij->A->data; 6656 b = (Mat_SeqAIJ *)mpiaij->B->data; 6657 a->free_a = PETSC_TRUE; 6658 a->free_ij = PETSC_TRUE; 6659 b->free_a = PETSC_TRUE; 6660 b->free_ij = PETSC_TRUE; 6661 a->maxnz = a->nz; 6662 b->maxnz = b->nz; 6663 6664 /* conversion must happen AFTER multiply setup */ 6665 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6666 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6667 PetscCall(VecDestroy(&mpiaij->lvec)); 6668 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6669 6670 // Put the COO struct in a container and then attach that to the matrix 6671 PetscCall(PetscMalloc1(1, &coo)); 6672 coo->n = coo_n; 6673 coo->sf = sf2; 6674 coo->sendlen = nleaves; 6675 coo->recvlen = nroots; 6676 coo->Annz = Annz; 6677 coo->Bnnz = Bnnz; 6678 coo->Annz2 = Annz2; 6679 coo->Bnnz2 = Bnnz2; 6680 coo->Atot1 = Atot1; 6681 coo->Atot2 = Atot2; 6682 coo->Btot1 = Btot1; 6683 coo->Btot2 = Btot2; 6684 coo->Ajmap1 = Ajmap1; 6685 coo->Aperm1 = Aperm1; 6686 coo->Bjmap1 = Bjmap1; 6687 coo->Bperm1 = Bperm1; 6688 coo->Aimap2 = Aimap2; 6689 coo->Ajmap2 = Ajmap2; 6690 coo->Aperm2 = Aperm2; 6691 coo->Bimap2 = Bimap2; 6692 coo->Bjmap2 = Bjmap2; 6693 coo->Bperm2 = Bperm2; 6694 coo->Cperm1 = Cperm1; 6695 // Allocate in preallocation. If not used, it has zero cost on host 6696 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6697 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6698 PetscCall(PetscContainerSetPointer(container, coo)); 6699 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6700 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6701 PetscCall(PetscContainerDestroy(&container)); 6702 PetscFunctionReturn(PETSC_SUCCESS); 6703 } 6704 6705 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6706 { 6707 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6708 Mat A = mpiaij->A, B = mpiaij->B; 6709 PetscScalar *Aa, *Ba; 6710 PetscScalar *sendbuf, *recvbuf; 6711 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6712 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6713 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6714 const PetscCount *Cperm1; 6715 PetscContainer container; 6716 MatCOOStruct_MPIAIJ *coo; 6717 6718 PetscFunctionBegin; 6719 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6720 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6721 PetscCall(PetscContainerGetPointer(container, &coo)); 6722 sendbuf = coo->sendbuf; 6723 recvbuf = coo->recvbuf; 6724 Ajmap1 = coo->Ajmap1; 6725 Ajmap2 = coo->Ajmap2; 6726 Aimap2 = coo->Aimap2; 6727 Bjmap1 = coo->Bjmap1; 6728 Bjmap2 = coo->Bjmap2; 6729 Bimap2 = coo->Bimap2; 6730 Aperm1 = coo->Aperm1; 6731 Aperm2 = coo->Aperm2; 6732 Bperm1 = coo->Bperm1; 6733 Bperm2 = coo->Bperm2; 6734 Cperm1 = coo->Cperm1; 6735 6736 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6737 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6738 6739 /* Pack entries to be sent to remote */ 6740 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6741 6742 /* Send remote entries to their owner and overlap the communication with local computation */ 6743 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6744 /* Add local entries to A and B */ 6745 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6746 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6747 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6748 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6749 } 6750 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6751 PetscScalar sum = 0.0; 6752 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6753 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6754 } 6755 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6756 6757 /* Add received remote entries to A and B */ 6758 for (PetscCount i = 0; i < coo->Annz2; i++) { 6759 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6760 } 6761 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6762 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6763 } 6764 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6765 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6766 PetscFunctionReturn(PETSC_SUCCESS); 6767 } 6768 6769 /*MC 6770 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6771 6772 Options Database Keys: 6773 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6774 6775 Level: beginner 6776 6777 Notes: 6778 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6779 in this case the values associated with the rows and columns one passes in are set to zero 6780 in the matrix 6781 6782 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6783 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6784 6785 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6786 M*/ 6787 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6788 { 6789 Mat_MPIAIJ *b; 6790 PetscMPIInt size; 6791 6792 PetscFunctionBegin; 6793 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6794 6795 PetscCall(PetscNew(&b)); 6796 B->data = (void *)b; 6797 B->ops[0] = MatOps_Values; 6798 B->assembled = PETSC_FALSE; 6799 B->insertmode = NOT_SET_VALUES; 6800 b->size = size; 6801 6802 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6803 6804 /* build cache for off array entries formed */ 6805 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6806 6807 b->donotstash = PETSC_FALSE; 6808 b->colmap = NULL; 6809 b->garray = NULL; 6810 b->roworiented = PETSC_TRUE; 6811 6812 /* stuff used for matrix vector multiply */ 6813 b->lvec = NULL; 6814 b->Mvctx = NULL; 6815 6816 /* stuff for MatGetRow() */ 6817 b->rowindices = NULL; 6818 b->rowvalues = NULL; 6819 b->getrowactive = PETSC_FALSE; 6820 6821 /* flexible pointer used in CUSPARSE classes */ 6822 b->spptr = NULL; 6823 6824 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6825 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6835 #if defined(PETSC_HAVE_CUDA) 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6837 #endif 6838 #if defined(PETSC_HAVE_HIP) 6839 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6840 #endif 6841 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6843 #endif 6844 #if defined(PETSC_HAVE_MKL_SPARSE) 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6846 #endif 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6851 #if defined(PETSC_HAVE_ELEMENTAL) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6853 #endif 6854 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6856 #endif 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6859 #if defined(PETSC_HAVE_HYPRE) 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6862 #endif 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6867 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6868 PetscFunctionReturn(PETSC_SUCCESS); 6869 } 6870 6871 /*@ 6872 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6873 and "off-diagonal" part of the matrix in CSR format. 6874 6875 Collective 6876 6877 Input Parameters: 6878 + comm - MPI communicator 6879 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6880 . n - This value should be the same as the local size used in creating the 6881 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6882 calculated if `N` is given) For square matrices `n` is almost always `m`. 6883 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6884 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6885 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6886 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6887 . a - matrix values 6888 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6889 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6890 - oa - matrix values 6891 6892 Output Parameter: 6893 . mat - the matrix 6894 6895 Level: advanced 6896 6897 Notes: 6898 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6899 must free the arrays once the matrix has been destroyed and not before. 6900 6901 The `i` and `j` indices are 0 based 6902 6903 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6904 6905 This sets local rows and cannot be used to set off-processor values. 6906 6907 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6908 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6909 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6910 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6911 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6912 communication if it is known that only local entries will be set. 6913 6914 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6915 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6916 @*/ 6917 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6918 { 6919 Mat_MPIAIJ *maij; 6920 6921 PetscFunctionBegin; 6922 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6923 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6924 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6925 PetscCall(MatCreate(comm, mat)); 6926 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6927 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6928 maij = (Mat_MPIAIJ *)(*mat)->data; 6929 6930 (*mat)->preallocated = PETSC_TRUE; 6931 6932 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6933 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6934 6935 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6936 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6937 6938 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6939 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6940 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6941 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6942 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6943 PetscFunctionReturn(PETSC_SUCCESS); 6944 } 6945 6946 typedef struct { 6947 Mat *mp; /* intermediate products */ 6948 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6949 PetscInt cp; /* number of intermediate products */ 6950 6951 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6952 PetscInt *startsj_s, *startsj_r; 6953 PetscScalar *bufa; 6954 Mat P_oth; 6955 6956 /* may take advantage of merging product->B */ 6957 Mat Bloc; /* B-local by merging diag and off-diag */ 6958 6959 /* cusparse does not have support to split between symbolic and numeric phases. 6960 When api_user is true, we don't need to update the numerical values 6961 of the temporary storage */ 6962 PetscBool reusesym; 6963 6964 /* support for COO values insertion */ 6965 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6966 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6967 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6968 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6969 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6970 PetscMemType mtype; 6971 6972 /* customization */ 6973 PetscBool abmerge; 6974 PetscBool P_oth_bind; 6975 } MatMatMPIAIJBACKEND; 6976 6977 static PetscErrorCode MatProductCtxDestroy_MatMatMPIAIJBACKEND(PetscCtxRt data) 6978 { 6979 MatMatMPIAIJBACKEND *mmdata = *(MatMatMPIAIJBACKEND **)data; 6980 PetscInt i; 6981 6982 PetscFunctionBegin; 6983 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6984 PetscCall(PetscFree(mmdata->bufa)); 6985 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6986 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6987 PetscCall(MatDestroy(&mmdata->P_oth)); 6988 PetscCall(MatDestroy(&mmdata->Bloc)); 6989 PetscCall(PetscSFDestroy(&mmdata->sf)); 6990 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6991 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6992 PetscCall(PetscFree(mmdata->own[0])); 6993 PetscCall(PetscFree(mmdata->own)); 6994 PetscCall(PetscFree(mmdata->off[0])); 6995 PetscCall(PetscFree(mmdata->off)); 6996 PetscCall(PetscFree(mmdata)); 6997 PetscFunctionReturn(PETSC_SUCCESS); 6998 } 6999 7000 /* Copy selected n entries with indices in idx[] of A to v[]. 7001 If idx is NULL, copy the whole data array of A to v[] 7002 */ 7003 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7004 { 7005 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7006 7007 PetscFunctionBegin; 7008 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7009 if (f) { 7010 PetscCall((*f)(A, n, idx, v)); 7011 } else { 7012 const PetscScalar *vv; 7013 7014 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7015 if (n && idx) { 7016 PetscScalar *w = v; 7017 const PetscInt *oi = idx; 7018 PetscInt j; 7019 7020 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7021 } else { 7022 PetscCall(PetscArraycpy(v, vv, n)); 7023 } 7024 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7025 } 7026 PetscFunctionReturn(PETSC_SUCCESS); 7027 } 7028 7029 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7030 { 7031 MatMatMPIAIJBACKEND *mmdata; 7032 PetscInt i, n_d, n_o; 7033 7034 PetscFunctionBegin; 7035 MatCheckProduct(C, 1); 7036 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7037 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7038 if (!mmdata->reusesym) { /* update temporary matrices */ 7039 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7040 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7041 } 7042 mmdata->reusesym = PETSC_FALSE; 7043 7044 for (i = 0; i < mmdata->cp; i++) { 7045 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7046 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7047 } 7048 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7049 PetscInt noff; 7050 7051 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7052 if (mmdata->mptmp[i]) continue; 7053 if (noff) { 7054 PetscInt nown; 7055 7056 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7057 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7058 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7059 n_o += noff; 7060 n_d += nown; 7061 } else { 7062 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7063 7064 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7065 n_d += mm->nz; 7066 } 7067 } 7068 if (mmdata->hasoffproc) { /* offprocess insertion */ 7069 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7070 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7071 } 7072 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7073 PetscFunctionReturn(PETSC_SUCCESS); 7074 } 7075 7076 /* Support for Pt * A, A * P, or Pt * A * P */ 7077 #define MAX_NUMBER_INTERMEDIATE 4 7078 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7079 { 7080 Mat_Product *product = C->product; 7081 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7082 Mat_MPIAIJ *a, *p; 7083 MatMatMPIAIJBACKEND *mmdata; 7084 ISLocalToGlobalMapping P_oth_l2g = NULL; 7085 IS glob = NULL; 7086 const char *prefix; 7087 char pprefix[256]; 7088 const PetscInt *globidx, *P_oth_idx; 7089 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7090 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7091 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7092 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7093 /* a base offset; type-2: sparse with a local to global map table */ 7094 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7095 7096 MatProductType ptype; 7097 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7098 PetscMPIInt size; 7099 7100 PetscFunctionBegin; 7101 MatCheckProduct(C, 1); 7102 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7103 ptype = product->type; 7104 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7105 ptype = MATPRODUCT_AB; 7106 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7107 } 7108 switch (ptype) { 7109 case MATPRODUCT_AB: 7110 A = product->A; 7111 P = product->B; 7112 m = A->rmap->n; 7113 n = P->cmap->n; 7114 M = A->rmap->N; 7115 N = P->cmap->N; 7116 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7117 break; 7118 case MATPRODUCT_AtB: 7119 P = product->A; 7120 A = product->B; 7121 m = P->cmap->n; 7122 n = A->cmap->n; 7123 M = P->cmap->N; 7124 N = A->cmap->N; 7125 hasoffproc = PETSC_TRUE; 7126 break; 7127 case MATPRODUCT_PtAP: 7128 A = product->A; 7129 P = product->B; 7130 m = P->cmap->n; 7131 n = P->cmap->n; 7132 M = P->cmap->N; 7133 N = P->cmap->N; 7134 hasoffproc = PETSC_TRUE; 7135 break; 7136 default: 7137 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7138 } 7139 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7140 if (size == 1) hasoffproc = PETSC_FALSE; 7141 7142 /* defaults */ 7143 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7144 mp[i] = NULL; 7145 mptmp[i] = PETSC_FALSE; 7146 rmapt[i] = -1; 7147 cmapt[i] = -1; 7148 rmapa[i] = NULL; 7149 cmapa[i] = NULL; 7150 } 7151 7152 /* customization */ 7153 PetscCall(PetscNew(&mmdata)); 7154 mmdata->reusesym = product->api_user; 7155 if (ptype == MATPRODUCT_AB) { 7156 if (product->api_user) { 7157 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7158 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7159 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7160 PetscOptionsEnd(); 7161 } else { 7162 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7163 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7164 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7165 PetscOptionsEnd(); 7166 } 7167 } else if (ptype == MATPRODUCT_PtAP) { 7168 if (product->api_user) { 7169 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7170 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7171 PetscOptionsEnd(); 7172 } else { 7173 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7174 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7175 PetscOptionsEnd(); 7176 } 7177 } 7178 a = (Mat_MPIAIJ *)A->data; 7179 p = (Mat_MPIAIJ *)P->data; 7180 PetscCall(MatSetSizes(C, m, n, M, N)); 7181 PetscCall(PetscLayoutSetUp(C->rmap)); 7182 PetscCall(PetscLayoutSetUp(C->cmap)); 7183 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7184 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7185 7186 cp = 0; 7187 switch (ptype) { 7188 case MATPRODUCT_AB: /* A * P */ 7189 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7190 7191 /* A_diag * P_local (merged or not) */ 7192 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7193 /* P is product->B */ 7194 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7195 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7196 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7197 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7198 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7199 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7200 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7201 mp[cp]->product->api_user = product->api_user; 7202 PetscCall(MatProductSetFromOptions(mp[cp])); 7203 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7204 PetscCall(ISGetIndices(glob, &globidx)); 7205 rmapt[cp] = 1; 7206 cmapt[cp] = 2; 7207 cmapa[cp] = globidx; 7208 mptmp[cp] = PETSC_FALSE; 7209 cp++; 7210 } else { /* A_diag * P_diag and A_diag * P_off */ 7211 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7212 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7213 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7214 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7215 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7216 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7217 mp[cp]->product->api_user = product->api_user; 7218 PetscCall(MatProductSetFromOptions(mp[cp])); 7219 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7220 rmapt[cp] = 1; 7221 cmapt[cp] = 1; 7222 mptmp[cp] = PETSC_FALSE; 7223 cp++; 7224 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7225 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7226 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7227 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7228 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7229 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7230 mp[cp]->product->api_user = product->api_user; 7231 PetscCall(MatProductSetFromOptions(mp[cp])); 7232 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7233 rmapt[cp] = 1; 7234 cmapt[cp] = 2; 7235 cmapa[cp] = p->garray; 7236 mptmp[cp] = PETSC_FALSE; 7237 cp++; 7238 } 7239 7240 /* A_off * P_other */ 7241 if (mmdata->P_oth) { 7242 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7243 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7244 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7245 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7246 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7247 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7248 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7249 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7250 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7251 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7252 mp[cp]->product->api_user = product->api_user; 7253 PetscCall(MatProductSetFromOptions(mp[cp])); 7254 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7255 rmapt[cp] = 1; 7256 cmapt[cp] = 2; 7257 cmapa[cp] = P_oth_idx; 7258 mptmp[cp] = PETSC_FALSE; 7259 cp++; 7260 } 7261 break; 7262 7263 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7264 /* A is product->B */ 7265 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7266 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7267 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7268 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7269 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7270 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7271 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7272 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7273 mp[cp]->product->api_user = product->api_user; 7274 PetscCall(MatProductSetFromOptions(mp[cp])); 7275 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7276 PetscCall(ISGetIndices(glob, &globidx)); 7277 rmapt[cp] = 2; 7278 rmapa[cp] = globidx; 7279 cmapt[cp] = 2; 7280 cmapa[cp] = globidx; 7281 mptmp[cp] = PETSC_FALSE; 7282 cp++; 7283 } else { 7284 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7285 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7286 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7287 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7288 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7289 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7290 mp[cp]->product->api_user = product->api_user; 7291 PetscCall(MatProductSetFromOptions(mp[cp])); 7292 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7293 PetscCall(ISGetIndices(glob, &globidx)); 7294 rmapt[cp] = 1; 7295 cmapt[cp] = 2; 7296 cmapa[cp] = globidx; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7300 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7301 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7302 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7303 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7304 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7305 mp[cp]->product->api_user = product->api_user; 7306 PetscCall(MatProductSetFromOptions(mp[cp])); 7307 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7308 rmapt[cp] = 2; 7309 rmapa[cp] = p->garray; 7310 cmapt[cp] = 2; 7311 cmapa[cp] = globidx; 7312 mptmp[cp] = PETSC_FALSE; 7313 cp++; 7314 } 7315 break; 7316 case MATPRODUCT_PtAP: 7317 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7318 /* P is product->B */ 7319 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7320 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7321 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7322 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7323 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7324 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7325 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7326 mp[cp]->product->api_user = product->api_user; 7327 PetscCall(MatProductSetFromOptions(mp[cp])); 7328 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7329 PetscCall(ISGetIndices(glob, &globidx)); 7330 rmapt[cp] = 2; 7331 rmapa[cp] = globidx; 7332 cmapt[cp] = 2; 7333 cmapa[cp] = globidx; 7334 mptmp[cp] = PETSC_FALSE; 7335 cp++; 7336 if (mmdata->P_oth) { 7337 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7338 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7339 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7340 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7341 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7342 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7343 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7344 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7345 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7346 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7347 mp[cp]->product->api_user = product->api_user; 7348 PetscCall(MatProductSetFromOptions(mp[cp])); 7349 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7350 mptmp[cp] = PETSC_TRUE; 7351 cp++; 7352 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7353 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7354 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7355 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7356 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7357 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7358 mp[cp]->product->api_user = product->api_user; 7359 PetscCall(MatProductSetFromOptions(mp[cp])); 7360 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7361 rmapt[cp] = 2; 7362 rmapa[cp] = globidx; 7363 cmapt[cp] = 2; 7364 cmapa[cp] = P_oth_idx; 7365 mptmp[cp] = PETSC_FALSE; 7366 cp++; 7367 } 7368 break; 7369 default: 7370 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7371 } 7372 /* sanity check */ 7373 if (size > 1) 7374 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7375 7376 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7377 for (i = 0; i < cp; i++) { 7378 mmdata->mp[i] = mp[i]; 7379 mmdata->mptmp[i] = mptmp[i]; 7380 } 7381 mmdata->cp = cp; 7382 C->product->data = mmdata; 7383 C->product->destroy = MatProductCtxDestroy_MatMatMPIAIJBACKEND; 7384 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7385 7386 /* memory type */ 7387 mmdata->mtype = PETSC_MEMTYPE_HOST; 7388 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7389 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7390 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7391 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7392 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7393 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7394 7395 /* prepare coo coordinates for values insertion */ 7396 7397 /* count total nonzeros of those intermediate seqaij Mats 7398 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7399 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7400 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7401 */ 7402 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7403 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7404 if (mptmp[cp]) continue; 7405 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7406 const PetscInt *rmap = rmapa[cp]; 7407 const PetscInt mr = mp[cp]->rmap->n; 7408 const PetscInt rs = C->rmap->rstart; 7409 const PetscInt re = C->rmap->rend; 7410 const PetscInt *ii = mm->i; 7411 for (i = 0; i < mr; i++) { 7412 const PetscInt gr = rmap[i]; 7413 const PetscInt nz = ii[i + 1] - ii[i]; 7414 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7415 else ncoo_oown += nz; /* this row is local */ 7416 } 7417 } else ncoo_d += mm->nz; 7418 } 7419 7420 /* 7421 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7422 7423 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7424 7425 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7426 7427 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7428 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7429 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7430 7431 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7432 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7433 */ 7434 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7435 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7436 7437 /* gather (i,j) of nonzeros inserted by remote procs */ 7438 if (hasoffproc) { 7439 PetscSF msf; 7440 PetscInt ncoo2, *coo_i2, *coo_j2; 7441 7442 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7443 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7444 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7445 7446 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7447 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7448 PetscInt *idxoff = mmdata->off[cp]; 7449 PetscInt *idxown = mmdata->own[cp]; 7450 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7451 const PetscInt *rmap = rmapa[cp]; 7452 const PetscInt *cmap = cmapa[cp]; 7453 const PetscInt *ii = mm->i; 7454 PetscInt *coi = coo_i + ncoo_o; 7455 PetscInt *coj = coo_j + ncoo_o; 7456 const PetscInt mr = mp[cp]->rmap->n; 7457 const PetscInt rs = C->rmap->rstart; 7458 const PetscInt re = C->rmap->rend; 7459 const PetscInt cs = C->cmap->rstart; 7460 for (i = 0; i < mr; i++) { 7461 const PetscInt *jj = mm->j + ii[i]; 7462 const PetscInt gr = rmap[i]; 7463 const PetscInt nz = ii[i + 1] - ii[i]; 7464 if (gr < rs || gr >= re) { /* this is an offproc row */ 7465 for (j = ii[i]; j < ii[i + 1]; j++) { 7466 *coi++ = gr; 7467 *idxoff++ = j; 7468 } 7469 if (!cmapt[cp]) { /* already global */ 7470 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7471 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7472 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7473 } else { /* offdiag */ 7474 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7475 } 7476 ncoo_o += nz; 7477 } else { /* this is a local row */ 7478 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7479 } 7480 } 7481 } 7482 mmdata->off[cp + 1] = idxoff; 7483 mmdata->own[cp + 1] = idxown; 7484 } 7485 7486 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7487 PetscInt incoo_o; 7488 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7489 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7490 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7491 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7492 ncoo = ncoo_d + ncoo_oown + ncoo2; 7493 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7494 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7495 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7496 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7497 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7498 PetscCall(PetscFree2(coo_i, coo_j)); 7499 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7500 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7501 coo_i = coo_i2; 7502 coo_j = coo_j2; 7503 } else { /* no offproc values insertion */ 7504 ncoo = ncoo_d; 7505 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7506 7507 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7508 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7509 PetscCall(PetscSFSetUp(mmdata->sf)); 7510 } 7511 mmdata->hasoffproc = hasoffproc; 7512 7513 /* gather (i,j) of nonzeros inserted locally */ 7514 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7515 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7516 PetscInt *coi = coo_i + ncoo_d; 7517 PetscInt *coj = coo_j + ncoo_d; 7518 const PetscInt *jj = mm->j; 7519 const PetscInt *ii = mm->i; 7520 const PetscInt *cmap = cmapa[cp]; 7521 const PetscInt *rmap = rmapa[cp]; 7522 const PetscInt mr = mp[cp]->rmap->n; 7523 const PetscInt rs = C->rmap->rstart; 7524 const PetscInt re = C->rmap->rend; 7525 const PetscInt cs = C->cmap->rstart; 7526 7527 if (mptmp[cp]) continue; 7528 if (rmapt[cp] == 1) { /* consecutive rows */ 7529 /* fill coo_i */ 7530 for (i = 0; i < mr; i++) { 7531 const PetscInt gr = i + rs; 7532 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7533 } 7534 /* fill coo_j */ 7535 if (!cmapt[cp]) { /* type-0, already global */ 7536 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7537 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7538 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7539 } else { /* type-2, local to global for sparse columns */ 7540 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7541 } 7542 ncoo_d += mm->nz; 7543 } else if (rmapt[cp] == 2) { /* sparse rows */ 7544 for (i = 0; i < mr; i++) { 7545 const PetscInt *jj = mm->j + ii[i]; 7546 const PetscInt gr = rmap[i]; 7547 const PetscInt nz = ii[i + 1] - ii[i]; 7548 if (gr >= rs && gr < re) { /* local rows */ 7549 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7550 if (!cmapt[cp]) { /* type-0, already global */ 7551 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7552 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7553 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7554 } else { /* type-2, local to global for sparse columns */ 7555 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7556 } 7557 ncoo_d += nz; 7558 } 7559 } 7560 } 7561 } 7562 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7563 PetscCall(ISDestroy(&glob)); 7564 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7565 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7566 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7567 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7568 7569 /* set block sizes */ 7570 A = product->A; 7571 P = product->B; 7572 switch (ptype) { 7573 case MATPRODUCT_PtAP: 7574 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7575 break; 7576 case MATPRODUCT_RARt: 7577 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7578 break; 7579 case MATPRODUCT_ABC: 7580 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7581 break; 7582 case MATPRODUCT_AB: 7583 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7584 break; 7585 case MATPRODUCT_AtB: 7586 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7587 break; 7588 case MATPRODUCT_ABt: 7589 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7590 break; 7591 default: 7592 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7593 } 7594 7595 /* preallocate with COO data */ 7596 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7597 PetscCall(PetscFree2(coo_i, coo_j)); 7598 PetscFunctionReturn(PETSC_SUCCESS); 7599 } 7600 7601 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7602 { 7603 Mat_Product *product = mat->product; 7604 #if defined(PETSC_HAVE_DEVICE) 7605 PetscBool match = PETSC_FALSE; 7606 PetscBool usecpu = PETSC_FALSE; 7607 #else 7608 PetscBool match = PETSC_TRUE; 7609 #endif 7610 7611 PetscFunctionBegin; 7612 MatCheckProduct(mat, 1); 7613 #if defined(PETSC_HAVE_DEVICE) 7614 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7615 if (match) { /* we can always fallback to the CPU if requested */ 7616 switch (product->type) { 7617 case MATPRODUCT_AB: 7618 if (product->api_user) { 7619 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7620 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7621 PetscOptionsEnd(); 7622 } else { 7623 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7624 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7625 PetscOptionsEnd(); 7626 } 7627 break; 7628 case MATPRODUCT_AtB: 7629 if (product->api_user) { 7630 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7631 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7632 PetscOptionsEnd(); 7633 } else { 7634 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7635 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7636 PetscOptionsEnd(); 7637 } 7638 break; 7639 case MATPRODUCT_PtAP: 7640 if (product->api_user) { 7641 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7642 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7643 PetscOptionsEnd(); 7644 } else { 7645 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7646 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7647 PetscOptionsEnd(); 7648 } 7649 break; 7650 default: 7651 break; 7652 } 7653 match = (PetscBool)!usecpu; 7654 } 7655 #endif 7656 if (match) { 7657 switch (product->type) { 7658 case MATPRODUCT_AB: 7659 case MATPRODUCT_AtB: 7660 case MATPRODUCT_PtAP: 7661 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7662 break; 7663 default: 7664 break; 7665 } 7666 } 7667 /* fallback to MPIAIJ ops */ 7668 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7669 PetscFunctionReturn(PETSC_SUCCESS); 7670 } 7671 7672 /* 7673 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7674 7675 n - the number of block indices in cc[] 7676 cc - the block indices (must be large enough to contain the indices) 7677 */ 7678 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7679 { 7680 PetscInt cnt = -1, nidx, j; 7681 const PetscInt *idx; 7682 7683 PetscFunctionBegin; 7684 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7685 if (nidx) { 7686 cnt = 0; 7687 cc[cnt] = idx[0] / bs; 7688 for (j = 1; j < nidx; j++) { 7689 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7690 } 7691 } 7692 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7693 *n = cnt + 1; 7694 PetscFunctionReturn(PETSC_SUCCESS); 7695 } 7696 7697 /* 7698 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7699 7700 ncollapsed - the number of block indices 7701 collapsed - the block indices (must be large enough to contain the indices) 7702 */ 7703 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7704 { 7705 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7706 7707 PetscFunctionBegin; 7708 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7709 for (i = start + 1; i < start + bs; i++) { 7710 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7711 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7712 cprevtmp = cprev; 7713 cprev = merged; 7714 merged = cprevtmp; 7715 } 7716 *ncollapsed = nprev; 7717 if (collapsed) *collapsed = cprev; 7718 PetscFunctionReturn(PETSC_SUCCESS); 7719 } 7720 7721 /* 7722 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7723 7724 Input Parameter: 7725 . Amat - matrix 7726 - symmetrize - make the result symmetric 7727 + scale - scale with diagonal 7728 7729 Output Parameter: 7730 . a_Gmat - output scalar graph >= 0 7731 7732 */ 7733 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7734 { 7735 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7736 MPI_Comm comm; 7737 Mat Gmat; 7738 PetscBool ismpiaij, isseqaij; 7739 Mat a, b, c; 7740 MatType jtype; 7741 7742 PetscFunctionBegin; 7743 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7744 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7745 PetscCall(MatGetSize(Amat, &MM, &NN)); 7746 PetscCall(MatGetBlockSize(Amat, &bs)); 7747 nloc = (Iend - Istart) / bs; 7748 7749 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7750 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7751 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7752 7753 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7754 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7755 implementation */ 7756 if (bs > 1) { 7757 PetscCall(MatGetType(Amat, &jtype)); 7758 PetscCall(MatCreate(comm, &Gmat)); 7759 PetscCall(MatSetType(Gmat, jtype)); 7760 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7761 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7762 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7763 PetscInt *d_nnz, *o_nnz; 7764 MatScalar *aa, val, *AA; 7765 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7766 7767 if (isseqaij) { 7768 a = Amat; 7769 b = NULL; 7770 } else { 7771 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7772 a = d->A; 7773 b = d->B; 7774 } 7775 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7776 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7777 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7778 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7779 const PetscInt *cols1, *cols2; 7780 7781 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7782 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7783 nnz[brow / bs] = nc2 / bs; 7784 if (nc2 % bs) ok = 0; 7785 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7786 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7787 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7788 if (nc1 != nc2) ok = 0; 7789 else { 7790 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7791 if (cols1[jj] != cols2[jj]) ok = 0; 7792 if (cols1[jj] % bs != jj % bs) ok = 0; 7793 } 7794 } 7795 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7796 } 7797 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7798 if (!ok) { 7799 PetscCall(PetscFree2(d_nnz, o_nnz)); 7800 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7801 goto old_bs; 7802 } 7803 } 7804 } 7805 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7806 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7807 PetscCall(PetscFree2(d_nnz, o_nnz)); 7808 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7809 // diag 7810 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7811 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7812 7813 ai = aseq->i; 7814 n = ai[brow + 1] - ai[brow]; 7815 aj = aseq->j + ai[brow]; 7816 for (PetscInt k = 0; k < n; k += bs) { // block columns 7817 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7818 val = 0; 7819 if (index_size == 0) { 7820 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7821 aa = aseq->a + ai[brow + ii] + k; 7822 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7823 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7824 } 7825 } 7826 } else { // use (index,index) value if provided 7827 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7828 PetscInt ii = index[iii]; 7829 aa = aseq->a + ai[brow + ii] + k; 7830 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7831 PetscInt jj = index[jjj]; 7832 val += PetscAbs(PetscRealPart(aa[jj])); 7833 } 7834 } 7835 } 7836 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7837 AA[k / bs] = val; 7838 } 7839 grow = Istart / bs + brow / bs; 7840 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7841 } 7842 // off-diag 7843 if (ismpiaij) { 7844 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7845 const PetscScalar *vals; 7846 const PetscInt *cols, *garray = aij->garray; 7847 7848 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7849 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7850 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7851 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7852 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7853 AA[k / bs] = 0; 7854 AJ[cidx] = garray[cols[k]] / bs; 7855 } 7856 nc = ncols / bs; 7857 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7858 if (index_size == 0) { 7859 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7860 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7861 for (PetscInt k = 0; k < ncols; k += bs) { 7862 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7863 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7864 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7865 } 7866 } 7867 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7868 } 7869 } else { // use (index,index) value if provided 7870 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7871 PetscInt ii = index[iii]; 7872 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7873 for (PetscInt k = 0; k < ncols; k += bs) { 7874 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7875 PetscInt jj = index[jjj]; 7876 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7877 } 7878 } 7879 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7880 } 7881 } 7882 grow = Istart / bs + brow / bs; 7883 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7884 } 7885 } 7886 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7887 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7888 PetscCall(PetscFree2(AA, AJ)); 7889 } else { 7890 const PetscScalar *vals; 7891 const PetscInt *idx; 7892 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7893 old_bs: 7894 /* 7895 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7896 */ 7897 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7898 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7899 if (isseqaij) { 7900 PetscInt max_d_nnz; 7901 7902 /* 7903 Determine exact preallocation count for (sequential) scalar matrix 7904 */ 7905 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7906 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7907 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7908 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7909 PetscCall(PetscFree3(w0, w1, w2)); 7910 } else if (ismpiaij) { 7911 Mat Daij, Oaij; 7912 const PetscInt *garray; 7913 PetscInt max_d_nnz; 7914 7915 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7916 /* 7917 Determine exact preallocation count for diagonal block portion of scalar matrix 7918 */ 7919 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7920 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7921 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7922 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7923 PetscCall(PetscFree3(w0, w1, w2)); 7924 /* 7925 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7926 */ 7927 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7928 o_nnz[jj] = 0; 7929 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7930 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7931 o_nnz[jj] += ncols; 7932 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7933 } 7934 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7935 } 7936 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7937 /* get scalar copy (norms) of matrix */ 7938 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7939 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7940 PetscCall(PetscFree2(d_nnz, o_nnz)); 7941 for (Ii = Istart; Ii < Iend; Ii++) { 7942 PetscInt dest_row = Ii / bs; 7943 7944 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7945 for (jj = 0; jj < ncols; jj++) { 7946 PetscInt dest_col = idx[jj] / bs; 7947 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7948 7949 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7950 } 7951 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7952 } 7953 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7954 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7955 } 7956 } else { 7957 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7958 else { 7959 Gmat = Amat; 7960 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7961 } 7962 if (isseqaij) { 7963 a = Gmat; 7964 b = NULL; 7965 } else { 7966 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7967 a = d->A; 7968 b = d->B; 7969 } 7970 if (filter >= 0 || scale) { 7971 /* take absolute value of each entry */ 7972 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7973 MatInfo info; 7974 PetscScalar *avals; 7975 7976 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7977 PetscCall(MatSeqAIJGetArray(c, &avals)); 7978 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7979 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7980 } 7981 } 7982 } 7983 if (symmetrize) { 7984 PetscBool isset, issym; 7985 7986 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7987 if (!isset || !issym) { 7988 Mat matTrans; 7989 7990 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7991 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7992 PetscCall(MatDestroy(&matTrans)); 7993 } 7994 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7995 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7996 if (scale) { 7997 /* scale c for all diagonal values = 1 or -1 */ 7998 Vec diag; 7999 8000 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8001 PetscCall(MatGetDiagonal(Gmat, diag)); 8002 PetscCall(VecReciprocal(diag)); 8003 PetscCall(VecSqrtAbs(diag)); 8004 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8005 PetscCall(VecDestroy(&diag)); 8006 } 8007 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8008 if (filter >= 0) { 8009 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8010 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8011 } 8012 *a_Gmat = Gmat; 8013 PetscFunctionReturn(PETSC_SUCCESS); 8014 } 8015 8016 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8017 { 8018 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8019 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8020 8021 PetscFunctionBegin; 8022 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8023 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8024 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8025 PetscFunctionReturn(PETSC_SUCCESS); 8026 } 8027 8028 /* 8029 Special version for direct calls from Fortran 8030 */ 8031 8032 /* Change these macros so can be used in void function */ 8033 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8034 #undef PetscCall 8035 #define PetscCall(...) \ 8036 do { \ 8037 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8038 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8039 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8040 return; \ 8041 } \ 8042 } while (0) 8043 8044 #undef SETERRQ 8045 #define SETERRQ(comm, ierr, ...) \ 8046 do { \ 8047 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8048 return; \ 8049 } while (0) 8050 8051 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8052 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8053 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8054 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8055 #else 8056 #endif 8057 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8058 { 8059 Mat mat = *mmat; 8060 PetscInt m = *mm, n = *mn; 8061 InsertMode addv = *maddv; 8062 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8063 PetscScalar value; 8064 8065 MatCheckPreallocated(mat, 1); 8066 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8067 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8068 { 8069 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8070 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8071 PetscBool roworiented = aij->roworiented; 8072 8073 /* Some Variables required in the macro */ 8074 Mat A = aij->A; 8075 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8076 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8077 MatScalar *aa; 8078 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8079 Mat B = aij->B; 8080 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8081 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8082 MatScalar *ba; 8083 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8084 * cannot use "#if defined" inside a macro. */ 8085 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8086 8087 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8088 PetscInt nonew = a->nonew; 8089 MatScalar *ap1, *ap2; 8090 8091 PetscFunctionBegin; 8092 PetscCall(MatSeqAIJGetArray(A, &aa)); 8093 PetscCall(MatSeqAIJGetArray(B, &ba)); 8094 for (i = 0; i < m; i++) { 8095 if (im[i] < 0) continue; 8096 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8097 if (im[i] >= rstart && im[i] < rend) { 8098 row = im[i] - rstart; 8099 lastcol1 = -1; 8100 rp1 = aj + ai[row]; 8101 ap1 = aa + ai[row]; 8102 rmax1 = aimax[row]; 8103 nrow1 = ailen[row]; 8104 low1 = 0; 8105 high1 = nrow1; 8106 lastcol2 = -1; 8107 rp2 = bj + bi[row]; 8108 ap2 = ba + bi[row]; 8109 rmax2 = bimax[row]; 8110 nrow2 = bilen[row]; 8111 low2 = 0; 8112 high2 = nrow2; 8113 8114 for (j = 0; j < n; j++) { 8115 if (roworiented) value = v[i * n + j]; 8116 else value = v[i + j * m]; 8117 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8118 if (in[j] >= cstart && in[j] < cend) { 8119 col = in[j] - cstart; 8120 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8121 } else if (in[j] < 0) continue; 8122 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8123 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8124 } else { 8125 if (mat->was_assembled) { 8126 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8127 #if defined(PETSC_USE_CTABLE) 8128 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8129 col--; 8130 #else 8131 col = aij->colmap[in[j]] - 1; 8132 #endif 8133 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8134 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8135 col = in[j]; 8136 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8137 B = aij->B; 8138 b = (Mat_SeqAIJ *)B->data; 8139 bimax = b->imax; 8140 bi = b->i; 8141 bilen = b->ilen; 8142 bj = b->j; 8143 rp2 = bj + bi[row]; 8144 ap2 = ba + bi[row]; 8145 rmax2 = bimax[row]; 8146 nrow2 = bilen[row]; 8147 low2 = 0; 8148 high2 = nrow2; 8149 bm = aij->B->rmap->n; 8150 ba = b->a; 8151 inserted = PETSC_FALSE; 8152 } 8153 } else col = in[j]; 8154 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8155 } 8156 } 8157 } else if (!aij->donotstash) { 8158 if (roworiented) { 8159 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8160 } else { 8161 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8162 } 8163 } 8164 } 8165 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8166 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8167 } 8168 PetscFunctionReturnVoid(); 8169 } 8170 8171 /* Undefining these here since they were redefined from their original definition above! No 8172 * other PETSc functions should be defined past this point, as it is impossible to recover the 8173 * original definitions */ 8174 #undef PetscCall 8175 #undef SETERRQ 8176