1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow down the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 PetscCheck(1 == ((Mat_SeqAIJ *)aij->B->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } 615 } else col = in[j]; 616 nonew = b->nonew; 617 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 618 } 619 } 620 } else { 621 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 622 if (!aij->donotstash) { 623 mat->assembled = PETSC_FALSE; 624 if (roworiented) { 625 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 626 } else { 627 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 628 } 629 } 630 } 631 } 632 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 633 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 634 PetscFunctionReturn(PETSC_SUCCESS); 635 } 636 637 /* 638 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 639 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 640 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 641 */ 642 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 643 { 644 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 645 Mat A = aij->A; /* diagonal part of the matrix */ 646 Mat B = aij->B; /* off-diagonal part of the matrix */ 647 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 648 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 649 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 650 PetscInt *ailen = a->ilen, *aj = a->j; 651 PetscInt *bilen = b->ilen, *bj = b->j; 652 PetscInt am = aij->A->rmap->n, j; 653 PetscInt diag_so_far = 0, dnz; 654 PetscInt offd_so_far = 0, onz; 655 656 PetscFunctionBegin; 657 /* Iterate over all rows of the matrix */ 658 for (j = 0; j < am; j++) { 659 dnz = onz = 0; 660 /* Iterate over all non-zero columns of the current row */ 661 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 662 /* If column is in the diagonal */ 663 if (mat_j[col] >= cstart && mat_j[col] < cend) { 664 aj[diag_so_far++] = mat_j[col] - cstart; 665 dnz++; 666 } else { /* off-diagonal entries */ 667 bj[offd_so_far++] = mat_j[col]; 668 onz++; 669 } 670 } 671 ailen[j] = dnz; 672 bilen[j] = onz; 673 } 674 PetscFunctionReturn(PETSC_SUCCESS); 675 } 676 677 /* 678 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 679 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 680 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 681 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 682 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 683 */ 684 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 685 { 686 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 687 Mat A = aij->A; /* diagonal part of the matrix */ 688 Mat B = aij->B; /* off-diagonal part of the matrix */ 689 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 690 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 691 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 692 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 693 PetscInt *ailen = a->ilen, *aj = a->j; 694 PetscInt *bilen = b->ilen, *bj = b->j; 695 PetscInt am = aij->A->rmap->n, j; 696 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 697 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 698 PetscScalar *aa = a->a, *ba = b->a; 699 700 PetscFunctionBegin; 701 /* Iterate over all rows of the matrix */ 702 for (j = 0; j < am; j++) { 703 dnz_row = onz_row = 0; 704 rowstart_offd = full_offd_i[j]; 705 rowstart_diag = full_diag_i[j]; 706 /* Iterate over all non-zero columns of the current row */ 707 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 708 /* If column is in the diagonal */ 709 if (mat_j[col] >= cstart && mat_j[col] < cend) { 710 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 711 aa[rowstart_diag + dnz_row] = mat_a[col]; 712 dnz_row++; 713 } else { /* off-diagonal entries */ 714 bj[rowstart_offd + onz_row] = mat_j[col]; 715 ba[rowstart_offd + onz_row] = mat_a[col]; 716 onz_row++; 717 } 718 } 719 ailen[j] = dnz_row; 720 bilen[j] = onz_row; 721 } 722 PetscFunctionReturn(PETSC_SUCCESS); 723 } 724 725 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 726 { 727 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 728 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 729 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 730 731 PetscFunctionBegin; 732 for (i = 0; i < m; i++) { 733 if (idxm[i] < 0) continue; /* negative row */ 734 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 735 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 736 row = idxm[i] - rstart; 737 for (j = 0; j < n; j++) { 738 if (idxn[j] < 0) continue; /* negative column */ 739 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 740 if (idxn[j] >= cstart && idxn[j] < cend) { 741 col = idxn[j] - cstart; 742 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 743 } else { 744 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 745 #if defined(PETSC_USE_CTABLE) 746 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 747 col--; 748 #else 749 col = aij->colmap[idxn[j]] - 1; 750 #endif 751 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 752 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 753 } 754 } 755 } 756 PetscFunctionReturn(PETSC_SUCCESS); 757 } 758 759 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 760 { 761 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 762 PetscInt nstash, reallocs; 763 764 PetscFunctionBegin; 765 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 766 767 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 768 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 769 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 774 { 775 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 776 PetscMPIInt n; 777 PetscInt i, j, rstart, ncols, flg; 778 PetscInt *row, *col; 779 PetscBool all_assembled; 780 PetscScalar *val; 781 782 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 783 784 PetscFunctionBegin; 785 if (!aij->donotstash && !mat->nooffprocentries) { 786 while (1) { 787 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 788 if (!flg) break; 789 790 for (i = 0; i < n;) { 791 /* Now identify the consecutive vals belonging to the same row */ 792 for (j = i, rstart = row[j]; j < n; j++) { 793 if (row[j] != rstart) break; 794 } 795 if (j < n) ncols = j - i; 796 else ncols = n - i; 797 /* Now assemble all these values with a single function call */ 798 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 799 i = j; 800 } 801 } 802 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 803 } 804 #if defined(PETSC_HAVE_DEVICE) 805 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 806 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 807 if (mat->boundtocpu) { 808 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 809 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 810 } 811 #endif 812 PetscCall(MatAssemblyBegin(aij->A, mode)); 813 PetscCall(MatAssemblyEnd(aij->A, mode)); 814 815 /* determine if any process has disassembled, if so we must 816 also disassemble ourself, in order that we may reassemble. */ 817 /* 818 if nonzero structure of submatrix B cannot change then we know that 819 no process disassembled thus we can skip this stuff 820 */ 821 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 822 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 823 if (mat->was_assembled && !all_assembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 824 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 825 } 826 } 827 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 828 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 829 #if defined(PETSC_HAVE_DEVICE) 830 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 831 #endif 832 PetscCall(MatAssemblyBegin(aij->B, mode)); 833 PetscCall(MatAssemblyEnd(aij->B, mode)); 834 835 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 836 837 aij->rowvalues = NULL; 838 839 PetscCall(VecDestroy(&aij->diag)); 840 841 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 842 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 843 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 844 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 845 } 846 #if defined(PETSC_HAVE_DEVICE) 847 mat->offloadmask = PETSC_OFFLOAD_BOTH; 848 #endif 849 PetscFunctionReturn(PETSC_SUCCESS); 850 } 851 852 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 853 { 854 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 855 856 PetscFunctionBegin; 857 PetscCall(MatZeroEntries(l->A)); 858 PetscCall(MatZeroEntries(l->B)); 859 PetscFunctionReturn(PETSC_SUCCESS); 860 } 861 862 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 863 { 864 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 865 PetscInt *lrows; 866 PetscInt r, len; 867 PetscBool cong; 868 869 PetscFunctionBegin; 870 /* get locally owned rows */ 871 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 872 PetscCall(MatHasCongruentLayouts(A, &cong)); 873 /* fix right-hand side if needed */ 874 if (x && b) { 875 const PetscScalar *xx; 876 PetscScalar *bb; 877 878 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 879 PetscCall(VecGetArrayRead(x, &xx)); 880 PetscCall(VecGetArray(b, &bb)); 881 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 882 PetscCall(VecRestoreArrayRead(x, &xx)); 883 PetscCall(VecRestoreArray(b, &bb)); 884 } 885 886 if (diag != 0.0 && cong) { 887 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 888 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 889 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 890 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 891 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 892 PetscInt nnwA, nnwB; 893 PetscBool nnzA, nnzB; 894 895 nnwA = aijA->nonew; 896 nnwB = aijB->nonew; 897 nnzA = aijA->keepnonzeropattern; 898 nnzB = aijB->keepnonzeropattern; 899 if (!nnzA) { 900 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 901 aijA->nonew = 0; 902 } 903 if (!nnzB) { 904 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 905 aijB->nonew = 0; 906 } 907 /* Must zero here before the next loop */ 908 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 909 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 910 for (r = 0; r < len; ++r) { 911 const PetscInt row = lrows[r] + A->rmap->rstart; 912 if (row >= A->cmap->N) continue; 913 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 914 } 915 aijA->nonew = nnwA; 916 aijB->nonew = nnwB; 917 } else { 918 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 919 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 920 } 921 PetscCall(PetscFree(lrows)); 922 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 923 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 924 925 /* only change matrix nonzero state if pattern was allowed to be changed */ 926 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 927 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 928 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 929 } 930 PetscFunctionReturn(PETSC_SUCCESS); 931 } 932 933 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 934 { 935 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 936 PetscInt n = A->rmap->n; 937 PetscInt i, j, r, m, len = 0; 938 PetscInt *lrows, *owners = A->rmap->range; 939 PetscMPIInt p = 0; 940 PetscSFNode *rrows; 941 PetscSF sf; 942 const PetscScalar *xx; 943 PetscScalar *bb, *mask, *aij_a; 944 Vec xmask, lmask; 945 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 946 const PetscInt *aj, *ii, *ridx; 947 PetscScalar *aa; 948 949 PetscFunctionBegin; 950 /* Create SF where leaves are input rows and roots are owned rows */ 951 PetscCall(PetscMalloc1(n, &lrows)); 952 for (r = 0; r < n; ++r) lrows[r] = -1; 953 PetscCall(PetscMalloc1(N, &rrows)); 954 for (r = 0; r < N; ++r) { 955 const PetscInt idx = rows[r]; 956 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 957 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 958 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 959 } 960 rrows[r].rank = p; 961 rrows[r].index = rows[r] - owners[p]; 962 } 963 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 964 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 965 /* Collect flags for rows to be zeroed */ 966 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 967 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFDestroy(&sf)); 969 /* Compress and put in row numbers */ 970 for (r = 0; r < n; ++r) 971 if (lrows[r] >= 0) lrows[len++] = r; 972 /* zero diagonal part of matrix */ 973 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 974 /* handle off-diagonal part of matrix */ 975 PetscCall(MatCreateVecs(A, &xmask, NULL)); 976 PetscCall(VecDuplicate(l->lvec, &lmask)); 977 PetscCall(VecGetArray(xmask, &bb)); 978 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 979 PetscCall(VecRestoreArray(xmask, &bb)); 980 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 981 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecDestroy(&xmask)); 983 if (x && b) { /* this code is buggy when the row and column layout don't match */ 984 PetscBool cong; 985 986 PetscCall(MatHasCongruentLayouts(A, &cong)); 987 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 988 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 989 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecGetArrayRead(l->lvec, &xx)); 991 PetscCall(VecGetArray(b, &bb)); 992 } 993 PetscCall(VecGetArray(lmask, &mask)); 994 /* remove zeroed rows of off-diagonal matrix */ 995 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 996 ii = aij->i; 997 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 998 /* loop over all elements of off process part of matrix zeroing removed columns*/ 999 if (aij->compressedrow.use) { 1000 m = aij->compressedrow.nrows; 1001 ii = aij->compressedrow.i; 1002 ridx = aij->compressedrow.rindex; 1003 for (i = 0; i < m; i++) { 1004 n = ii[i + 1] - ii[i]; 1005 aj = aij->j + ii[i]; 1006 aa = aij_a + ii[i]; 1007 1008 for (j = 0; j < n; j++) { 1009 if (PetscAbsScalar(mask[*aj])) { 1010 if (b) bb[*ridx] -= *aa * xx[*aj]; 1011 *aa = 0.0; 1012 } 1013 aa++; 1014 aj++; 1015 } 1016 ridx++; 1017 } 1018 } else { /* do not use compressed row format */ 1019 m = l->B->rmap->n; 1020 for (i = 0; i < m; i++) { 1021 n = ii[i + 1] - ii[i]; 1022 aj = aij->j + ii[i]; 1023 aa = aij_a + ii[i]; 1024 for (j = 0; j < n; j++) { 1025 if (PetscAbsScalar(mask[*aj])) { 1026 if (b) bb[i] -= *aa * xx[*aj]; 1027 *aa = 0.0; 1028 } 1029 aa++; 1030 aj++; 1031 } 1032 } 1033 } 1034 if (x && b) { 1035 PetscCall(VecRestoreArray(b, &bb)); 1036 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1037 } 1038 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1039 PetscCall(VecRestoreArray(lmask, &mask)); 1040 PetscCall(VecDestroy(&lmask)); 1041 PetscCall(PetscFree(lrows)); 1042 1043 /* only change matrix nonzero state if pattern was allowed to be changed */ 1044 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1045 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1046 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1047 } 1048 PetscFunctionReturn(PETSC_SUCCESS); 1049 } 1050 1051 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1052 { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 PetscInt nt; 1055 VecScatter Mvctx = a->Mvctx; 1056 1057 PetscFunctionBegin; 1058 PetscCall(VecGetLocalSize(xx, &nt)); 1059 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1060 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1061 PetscUseTypeMethod(a->A, mult, xx, yy); 1062 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1063 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1064 PetscFunctionReturn(PETSC_SUCCESS); 1065 } 1066 1067 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1068 { 1069 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1070 1071 PetscFunctionBegin; 1072 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1073 PetscFunctionReturn(PETSC_SUCCESS); 1074 } 1075 1076 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1077 { 1078 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1079 VecScatter Mvctx = a->Mvctx; 1080 1081 PetscFunctionBegin; 1082 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1083 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1084 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1085 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1086 PetscFunctionReturn(PETSC_SUCCESS); 1087 } 1088 1089 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1090 { 1091 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1092 1093 PetscFunctionBegin; 1094 /* do nondiagonal part */ 1095 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1096 /* do local part */ 1097 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1098 /* add partial results together */ 1099 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1100 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscFunctionReturn(PETSC_SUCCESS); 1102 } 1103 1104 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1105 { 1106 MPI_Comm comm; 1107 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1108 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1109 IS Me, Notme; 1110 PetscInt M, N, first, last, *notme, i; 1111 PetscBool lf; 1112 PetscMPIInt size; 1113 1114 PetscFunctionBegin; 1115 /* Easy test: symmetric diagonal block */ 1116 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1117 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1118 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1119 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1120 PetscCallMPI(MPI_Comm_size(comm, &size)); 1121 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1122 1123 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1124 PetscCall(MatGetSize(Amat, &M, &N)); 1125 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1126 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1127 for (i = 0; i < first; i++) notme[i] = i; 1128 for (i = last; i < M; i++) notme[i - last + first] = i; 1129 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1130 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1131 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1132 Aoff = Aoffs[0]; 1133 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1134 Boff = Boffs[0]; 1135 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1136 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1137 PetscCall(MatDestroyMatrices(1, &Boffs)); 1138 PetscCall(ISDestroy(&Me)); 1139 PetscCall(ISDestroy(&Notme)); 1140 PetscCall(PetscFree(notme)); 1141 PetscFunctionReturn(PETSC_SUCCESS); 1142 } 1143 1144 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1145 { 1146 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1147 1148 PetscFunctionBegin; 1149 /* do nondiagonal part */ 1150 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1151 /* do local part */ 1152 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1153 /* add partial results together */ 1154 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1155 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscFunctionReturn(PETSC_SUCCESS); 1157 } 1158 1159 /* 1160 This only works correctly for square matrices where the subblock A->A is the 1161 diagonal block 1162 */ 1163 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1164 { 1165 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1166 1167 PetscFunctionBegin; 1168 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1169 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1170 PetscCall(MatGetDiagonal(a->A, v)); 1171 PetscFunctionReturn(PETSC_SUCCESS); 1172 } 1173 1174 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1175 { 1176 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1177 1178 PetscFunctionBegin; 1179 PetscCall(MatScale(a->A, aa)); 1180 PetscCall(MatScale(a->B, aa)); 1181 PetscFunctionReturn(PETSC_SUCCESS); 1182 } 1183 1184 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1185 { 1186 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1187 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1188 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1189 const PetscInt *garray = aij->garray; 1190 const PetscScalar *aa, *ba; 1191 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1192 PetscInt64 nz, hnz; 1193 PetscInt *rowlens; 1194 PetscInt *colidxs; 1195 PetscScalar *matvals; 1196 PetscMPIInt rank; 1197 1198 PetscFunctionBegin; 1199 PetscCall(PetscViewerSetUp(viewer)); 1200 1201 M = mat->rmap->N; 1202 N = mat->cmap->N; 1203 m = mat->rmap->n; 1204 rs = mat->rmap->rstart; 1205 cs = mat->cmap->rstart; 1206 nz = A->nz + B->nz; 1207 1208 /* write matrix header */ 1209 header[0] = MAT_FILE_CLASSID; 1210 header[1] = M; 1211 header[2] = N; 1212 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1213 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1214 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1215 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1216 1217 /* fill in and store row lengths */ 1218 PetscCall(PetscMalloc1(m, &rowlens)); 1219 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1220 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1221 PetscCall(PetscFree(rowlens)); 1222 1223 /* fill in and store column indices */ 1224 PetscCall(PetscMalloc1(nz, &colidxs)); 1225 for (cnt = 0, i = 0; i < m; i++) { 1226 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1227 if (garray[B->j[jb]] > cs) break; 1228 colidxs[cnt++] = garray[B->j[jb]]; 1229 } 1230 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1231 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1232 } 1233 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1234 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1235 PetscCall(PetscFree(colidxs)); 1236 1237 /* fill in and store nonzero values */ 1238 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1239 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1240 PetscCall(PetscMalloc1(nz, &matvals)); 1241 for (cnt = 0, i = 0; i < m; i++) { 1242 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1243 if (garray[B->j[jb]] > cs) break; 1244 matvals[cnt++] = ba[jb]; 1245 } 1246 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1247 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1248 } 1249 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1251 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1252 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1253 PetscCall(PetscFree(matvals)); 1254 1255 /* write block size option to the viewer's .info file */ 1256 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1257 PetscFunctionReturn(PETSC_SUCCESS); 1258 } 1259 1260 #include <petscdraw.h> 1261 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1262 { 1263 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1264 PetscMPIInt rank = aij->rank, size = aij->size; 1265 PetscBool isdraw, isascii, isbinary; 1266 PetscViewer sviewer; 1267 PetscViewerFormat format; 1268 1269 PetscFunctionBegin; 1270 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1273 if (isascii) { 1274 PetscCall(PetscViewerGetFormat(viewer, &format)); 1275 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1276 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1277 PetscCall(PetscMalloc1(size, &nz)); 1278 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1279 for (i = 0; i < size; i++) { 1280 nmax = PetscMax(nmax, nz[i]); 1281 nmin = PetscMin(nmin, nz[i]); 1282 navg += nz[i]; 1283 } 1284 PetscCall(PetscFree(nz)); 1285 navg = navg / size; 1286 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1287 PetscFunctionReturn(PETSC_SUCCESS); 1288 } 1289 PetscCall(PetscViewerGetFormat(viewer, &format)); 1290 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1291 MatInfo info; 1292 PetscInt *inodes = NULL; 1293 1294 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1295 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1296 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1297 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1298 if (!inodes) { 1299 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1300 info.memory)); 1301 } else { 1302 PetscCall( 1303 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1304 } 1305 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1306 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1307 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1308 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1309 PetscCall(PetscViewerFlush(viewer)); 1310 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1311 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1312 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1313 PetscFunctionReturn(PETSC_SUCCESS); 1314 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1315 PetscInt inodecount, inodelimit, *inodes; 1316 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1317 if (inodes) { 1318 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1319 } else { 1320 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1321 } 1322 PetscFunctionReturn(PETSC_SUCCESS); 1323 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1324 PetscFunctionReturn(PETSC_SUCCESS); 1325 } 1326 } else if (isbinary) { 1327 if (size == 1) { 1328 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1329 PetscCall(MatView(aij->A, viewer)); 1330 } else { 1331 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1332 } 1333 PetscFunctionReturn(PETSC_SUCCESS); 1334 } else if (isascii && size == 1) { 1335 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1336 PetscCall(MatView(aij->A, viewer)); 1337 PetscFunctionReturn(PETSC_SUCCESS); 1338 } else if (isdraw) { 1339 PetscDraw draw; 1340 PetscBool isnull; 1341 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1342 PetscCall(PetscDrawIsNull(draw, &isnull)); 1343 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1344 } 1345 1346 { /* assemble the entire matrix onto first processor */ 1347 Mat A = NULL, Av; 1348 IS isrow, iscol; 1349 1350 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1352 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1353 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1354 /* The commented code uses MatCreateSubMatrices instead */ 1355 /* 1356 Mat *AA, A = NULL, Av; 1357 IS isrow,iscol; 1358 1359 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1361 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1362 if (rank == 0) { 1363 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1364 A = AA[0]; 1365 Av = AA[0]; 1366 } 1367 PetscCall(MatDestroySubMatrices(1,&AA)); 1368 */ 1369 PetscCall(ISDestroy(&iscol)); 1370 PetscCall(ISDestroy(&isrow)); 1371 /* 1372 Everyone has to call to draw the matrix since the graphics waits are 1373 synchronized across all processors that share the PetscDraw object 1374 */ 1375 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1376 if (rank == 0) { 1377 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1378 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1379 } 1380 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1381 PetscCall(MatDestroy(&A)); 1382 } 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1387 { 1388 PetscBool isascii, isdraw, issocket, isbinary; 1389 1390 PetscFunctionBegin; 1391 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1395 if (isascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1400 { 1401 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1402 Vec bb1 = NULL; 1403 PetscBool hasop; 1404 1405 PetscFunctionBegin; 1406 if (flag == SOR_APPLY_UPPER) { 1407 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1412 1413 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1414 if (flag & SOR_ZERO_INITIAL_GUESS) { 1415 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1416 its--; 1417 } 1418 1419 while (its--) { 1420 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1421 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 1423 /* update rhs: bb1 = bb - B*x */ 1424 PetscCall(VecScale(mat->lvec, -1.0)); 1425 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1426 1427 /* local sweep */ 1428 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1429 } 1430 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1431 if (flag & SOR_ZERO_INITIAL_GUESS) { 1432 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1433 its--; 1434 } 1435 while (its--) { 1436 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1437 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 1439 /* update rhs: bb1 = bb - B*x */ 1440 PetscCall(VecScale(mat->lvec, -1.0)); 1441 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1442 1443 /* local sweep */ 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1445 } 1446 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1447 if (flag & SOR_ZERO_INITIAL_GUESS) { 1448 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1449 its--; 1450 } 1451 while (its--) { 1452 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1453 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 1455 /* update rhs: bb1 = bb - B*x */ 1456 PetscCall(VecScale(mat->lvec, -1.0)); 1457 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1458 1459 /* local sweep */ 1460 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1461 } 1462 } else if (flag & SOR_EISENSTAT) { 1463 Vec xx1; 1464 1465 PetscCall(VecDuplicate(bb, &xx1)); 1466 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1467 1468 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1469 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 if (!mat->diag) { 1471 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1472 PetscCall(MatGetDiagonal(matin, mat->diag)); 1473 } 1474 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1475 if (hasop) { 1476 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1477 } else { 1478 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1479 } 1480 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1481 1482 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1483 1484 /* local sweep */ 1485 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1486 PetscCall(VecAXPY(xx, 1.0, xx1)); 1487 PetscCall(VecDestroy(&xx1)); 1488 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1489 1490 PetscCall(VecDestroy(&bb1)); 1491 1492 matin->factorerrortype = mat->A->factorerrortype; 1493 PetscFunctionReturn(PETSC_SUCCESS); 1494 } 1495 1496 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1497 { 1498 Mat aA, aB, Aperm; 1499 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1500 PetscScalar *aa, *ba; 1501 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1502 PetscSF rowsf, sf; 1503 IS parcolp = NULL; 1504 PetscBool done; 1505 1506 PetscFunctionBegin; 1507 PetscCall(MatGetLocalSize(A, &m, &n)); 1508 PetscCall(ISGetIndices(rowp, &rwant)); 1509 PetscCall(ISGetIndices(colp, &cwant)); 1510 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1511 1512 /* Invert row permutation to find out where my rows should go */ 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1514 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1515 PetscCall(PetscSFSetFromOptions(rowsf)); 1516 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1517 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1518 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 1520 /* Invert column permutation to find out where my columns should go */ 1521 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1522 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1523 PetscCall(PetscSFSetFromOptions(sf)); 1524 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1525 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1526 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFDestroy(&sf)); 1528 1529 PetscCall(ISRestoreIndices(rowp, &rwant)); 1530 PetscCall(ISRestoreIndices(colp, &cwant)); 1531 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1532 1533 /* Find out where my gcols should go */ 1534 PetscCall(MatGetSize(aB, NULL, &ng)); 1535 PetscCall(PetscMalloc1(ng, &gcdest)); 1536 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1537 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1538 PetscCall(PetscSFSetFromOptions(sf)); 1539 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1540 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFDestroy(&sf)); 1542 1543 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1544 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1545 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1546 for (i = 0; i < m; i++) { 1547 PetscInt row = rdest[i]; 1548 PetscMPIInt rowner; 1549 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1550 for (j = ai[i]; j < ai[i + 1]; j++) { 1551 PetscInt col = cdest[aj[j]]; 1552 PetscMPIInt cowner; 1553 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1554 if (rowner == cowner) dnnz[i]++; 1555 else onnz[i]++; 1556 } 1557 for (j = bi[i]; j < bi[i + 1]; j++) { 1558 PetscInt col = gcdest[bj[j]]; 1559 PetscMPIInt cowner; 1560 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1561 if (rowner == cowner) dnnz[i]++; 1562 else onnz[i]++; 1563 } 1564 } 1565 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1566 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFDestroy(&rowsf)); 1570 1571 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1572 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1573 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1574 for (i = 0; i < m; i++) { 1575 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1576 PetscInt j0, rowlen; 1577 rowlen = ai[i + 1] - ai[i]; 1578 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1579 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1580 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1581 } 1582 rowlen = bi[i + 1] - bi[i]; 1583 for (j0 = j = 0; j < rowlen; j0 = j) { 1584 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1585 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1586 } 1587 } 1588 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1589 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1591 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1592 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1593 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1594 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1595 PetscCall(PetscFree3(work, rdest, cdest)); 1596 PetscCall(PetscFree(gcdest)); 1597 if (parcolp) PetscCall(ISDestroy(&colp)); 1598 *B = Aperm; 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1603 { 1604 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1605 1606 PetscFunctionBegin; 1607 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1608 if (ghosts) *ghosts = aij->garray; 1609 PetscFunctionReturn(PETSC_SUCCESS); 1610 } 1611 1612 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1613 { 1614 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1615 Mat A = mat->A, B = mat->B; 1616 PetscLogDouble isend[5], irecv[5]; 1617 1618 PetscFunctionBegin; 1619 info->block_size = 1.0; 1620 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1621 1622 isend[0] = info->nz_used; 1623 isend[1] = info->nz_allocated; 1624 isend[2] = info->nz_unneeded; 1625 isend[3] = info->memory; 1626 isend[4] = info->mallocs; 1627 1628 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1629 1630 isend[0] += info->nz_used; 1631 isend[1] += info->nz_allocated; 1632 isend[2] += info->nz_unneeded; 1633 isend[3] += info->memory; 1634 isend[4] += info->mallocs; 1635 if (flag == MAT_LOCAL) { 1636 info->nz_used = isend[0]; 1637 info->nz_allocated = isend[1]; 1638 info->nz_unneeded = isend[2]; 1639 info->memory = isend[3]; 1640 info->mallocs = isend[4]; 1641 } else if (flag == MAT_GLOBAL_MAX) { 1642 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1643 1644 info->nz_used = irecv[0]; 1645 info->nz_allocated = irecv[1]; 1646 info->nz_unneeded = irecv[2]; 1647 info->memory = irecv[3]; 1648 info->mallocs = irecv[4]; 1649 } else if (flag == MAT_GLOBAL_SUM) { 1650 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1651 1652 info->nz_used = irecv[0]; 1653 info->nz_allocated = irecv[1]; 1654 info->nz_unneeded = irecv[2]; 1655 info->memory = irecv[3]; 1656 info->mallocs = irecv[4]; 1657 } 1658 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1659 info->fill_ratio_needed = 0; 1660 info->factor_mallocs = 0; 1661 PetscFunctionReturn(PETSC_SUCCESS); 1662 } 1663 1664 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1665 { 1666 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1667 1668 PetscFunctionBegin; 1669 switch (op) { 1670 case MAT_NEW_NONZERO_LOCATIONS: 1671 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1672 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1673 case MAT_KEEP_NONZERO_PATTERN: 1674 case MAT_NEW_NONZERO_LOCATION_ERR: 1675 case MAT_USE_INODES: 1676 case MAT_IGNORE_ZERO_ENTRIES: 1677 case MAT_FORM_EXPLICIT_TRANSPOSE: 1678 MatCheckPreallocated(A, 1); 1679 PetscCall(MatSetOption(a->A, op, flg)); 1680 PetscCall(MatSetOption(a->B, op, flg)); 1681 break; 1682 case MAT_ROW_ORIENTED: 1683 MatCheckPreallocated(A, 1); 1684 a->roworiented = flg; 1685 1686 PetscCall(MatSetOption(a->A, op, flg)); 1687 PetscCall(MatSetOption(a->B, op, flg)); 1688 break; 1689 case MAT_IGNORE_OFF_PROC_ENTRIES: 1690 a->donotstash = flg; 1691 break; 1692 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1693 case MAT_SPD: 1694 case MAT_SYMMETRIC: 1695 case MAT_STRUCTURALLY_SYMMETRIC: 1696 case MAT_HERMITIAN: 1697 case MAT_SYMMETRY_ETERNAL: 1698 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1699 case MAT_SPD_ETERNAL: 1700 /* if the diagonal matrix is square it inherits some of the properties above */ 1701 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1702 break; 1703 case MAT_SUBMAT_SINGLEIS: 1704 A->submat_singleis = flg; 1705 break; 1706 default: 1707 break; 1708 } 1709 PetscFunctionReturn(PETSC_SUCCESS); 1710 } 1711 1712 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1713 { 1714 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1715 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1716 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1717 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1718 PetscInt *cmap, *idx_p; 1719 1720 PetscFunctionBegin; 1721 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1722 mat->getrowactive = PETSC_TRUE; 1723 1724 if (!mat->rowvalues && (idx || v)) { 1725 /* 1726 allocate enough space to hold information from the longest row. 1727 */ 1728 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1729 PetscInt max = 1, tmp; 1730 for (i = 0; i < matin->rmap->n; i++) { 1731 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1732 if (max < tmp) max = tmp; 1733 } 1734 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1735 } 1736 1737 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1738 lrow = row - rstart; 1739 1740 pvA = &vworkA; 1741 pcA = &cworkA; 1742 pvB = &vworkB; 1743 pcB = &cworkB; 1744 if (!v) { 1745 pvA = NULL; 1746 pvB = NULL; 1747 } 1748 if (!idx) { 1749 pcA = NULL; 1750 if (!v) pcB = NULL; 1751 } 1752 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1753 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1754 nztot = nzA + nzB; 1755 1756 cmap = mat->garray; 1757 if (v || idx) { 1758 if (nztot) { 1759 /* Sort by increasing column numbers, assuming A and B already sorted */ 1760 PetscInt imark = -1; 1761 if (v) { 1762 *v = v_p = mat->rowvalues; 1763 for (i = 0; i < nzB; i++) { 1764 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1765 else break; 1766 } 1767 imark = i; 1768 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1769 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1770 } 1771 if (idx) { 1772 *idx = idx_p = mat->rowindices; 1773 if (imark > -1) { 1774 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1775 } else { 1776 for (i = 0; i < nzB; i++) { 1777 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1778 else break; 1779 } 1780 imark = i; 1781 } 1782 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1783 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1784 } 1785 } else { 1786 if (idx) *idx = NULL; 1787 if (v) *v = NULL; 1788 } 1789 } 1790 *nz = nztot; 1791 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1792 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1797 { 1798 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1799 1800 PetscFunctionBegin; 1801 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1802 aij->getrowactive = PETSC_FALSE; 1803 PetscFunctionReturn(PETSC_SUCCESS); 1804 } 1805 1806 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1807 { 1808 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1809 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1810 PetscInt i, j; 1811 PetscReal sum = 0.0; 1812 const MatScalar *v, *amata, *bmata; 1813 1814 PetscFunctionBegin; 1815 if (aij->size == 1) { 1816 PetscCall(MatNorm(aij->A, type, norm)); 1817 } else { 1818 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1819 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1820 if (type == NORM_FROBENIUS) { 1821 v = amata; 1822 for (i = 0; i < amat->nz; i++) { 1823 sum += PetscRealPart(PetscConj(*v) * (*v)); 1824 v++; 1825 } 1826 v = bmata; 1827 for (i = 0; i < bmat->nz; i++) { 1828 sum += PetscRealPart(PetscConj(*v) * (*v)); 1829 v++; 1830 } 1831 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1832 *norm = PetscSqrtReal(*norm); 1833 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1834 } else if (type == NORM_1) { /* max column norm */ 1835 Vec col, bcol; 1836 PetscScalar *array; 1837 PetscInt *jj, *garray = aij->garray; 1838 1839 PetscCall(MatCreateVecs(mat, &col, NULL)); 1840 PetscCall(VecSet(col, 0.0)); 1841 PetscCall(VecGetArrayWrite(col, &array)); 1842 v = amata; 1843 jj = amat->j; 1844 for (j = 0; j < amat->nz; j++) array[*jj++] += PetscAbsScalar(*v++); 1845 PetscCall(VecRestoreArrayWrite(col, &array)); 1846 PetscCall(MatCreateVecs(aij->B, &bcol, NULL)); 1847 PetscCall(VecSet(bcol, 0.0)); 1848 PetscCall(VecGetArrayWrite(bcol, &array)); 1849 v = bmata; 1850 jj = bmat->j; 1851 for (j = 0; j < bmat->nz; j++) array[*jj++] += PetscAbsScalar(*v++); 1852 PetscCall(VecSetValues(col, aij->B->cmap->n, garray, array, ADD_VALUES)); 1853 PetscCall(VecRestoreArrayWrite(bcol, &array)); 1854 PetscCall(VecDestroy(&bcol)); 1855 PetscCall(VecAssemblyBegin(col)); 1856 PetscCall(VecAssemblyEnd(col)); 1857 PetscCall(VecNorm(col, NORM_INFINITY, norm)); 1858 PetscCall(VecDestroy(&col)); 1859 } else if (type == NORM_INFINITY) { /* max row norm */ 1860 PetscReal ntemp = 0.0; 1861 for (j = 0; j < aij->A->rmap->n; j++) { 1862 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1863 sum = 0.0; 1864 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1865 sum += PetscAbsScalar(*v); 1866 v++; 1867 } 1868 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1869 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1870 sum += PetscAbsScalar(*v); 1871 v++; 1872 } 1873 if (sum > ntemp) ntemp = sum; 1874 } 1875 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1876 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1877 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1879 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1880 } 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1885 { 1886 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1887 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1888 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1889 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1890 Mat B, A_diag, *B_diag; 1891 const MatScalar *pbv, *bv; 1892 1893 PetscFunctionBegin; 1894 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1895 ma = A->rmap->n; 1896 na = A->cmap->n; 1897 mb = a->B->rmap->n; 1898 nb = a->B->cmap->n; 1899 ai = Aloc->i; 1900 aj = Aloc->j; 1901 bi = Bloc->i; 1902 bj = Bloc->j; 1903 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1904 PetscInt *d_nnz, *g_nnz, *o_nnz; 1905 PetscSFNode *oloc; 1906 PETSC_UNUSED PetscSF sf; 1907 1908 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1909 /* compute d_nnz for preallocation */ 1910 PetscCall(PetscArrayzero(d_nnz, na)); 1911 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1912 /* compute local off-diagonal contributions */ 1913 PetscCall(PetscArrayzero(g_nnz, nb)); 1914 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1915 /* map those to global */ 1916 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1917 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1918 PetscCall(PetscSFSetFromOptions(sf)); 1919 PetscCall(PetscArrayzero(o_nnz, na)); 1920 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1922 PetscCall(PetscSFDestroy(&sf)); 1923 1924 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1925 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1926 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1927 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1928 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1929 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1930 } else { 1931 B = *matout; 1932 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1933 } 1934 1935 b = (Mat_MPIAIJ *)B->data; 1936 A_diag = a->A; 1937 B_diag = &b->A; 1938 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1939 A_diag_ncol = A_diag->cmap->N; 1940 B_diag_ilen = sub_B_diag->ilen; 1941 B_diag_i = sub_B_diag->i; 1942 1943 /* Set ilen for diagonal of B */ 1944 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1945 1946 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1947 very quickly (=without using MatSetValues), because all writes are local. */ 1948 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1949 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1950 1951 /* copy over the B part */ 1952 PetscCall(PetscMalloc1(bi[mb], &cols)); 1953 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1954 pbv = bv; 1955 row = A->rmap->rstart; 1956 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1957 cols_tmp = cols; 1958 for (i = 0; i < mb; i++) { 1959 ncol = bi[i + 1] - bi[i]; 1960 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1961 row++; 1962 if (pbv) pbv += ncol; 1963 if (cols_tmp) cols_tmp += ncol; 1964 } 1965 PetscCall(PetscFree(cols)); 1966 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1967 1968 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1969 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1970 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1971 *matout = B; 1972 } else { 1973 PetscCall(MatHeaderMerge(A, &B)); 1974 } 1975 PetscFunctionReturn(PETSC_SUCCESS); 1976 } 1977 1978 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1979 { 1980 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1981 Mat a = aij->A, b = aij->B; 1982 PetscInt s1, s2, s3; 1983 1984 PetscFunctionBegin; 1985 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1986 if (rr) { 1987 PetscCall(VecGetLocalSize(rr, &s1)); 1988 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1989 /* Overlap communication with computation. */ 1990 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1991 } 1992 if (ll) { 1993 PetscCall(VecGetLocalSize(ll, &s1)); 1994 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1995 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1996 } 1997 /* scale the diagonal block */ 1998 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1999 2000 if (rr) { 2001 /* Do a scatter end and then right scale the off-diagonal block */ 2002 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2003 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2004 } 2005 PetscFunctionReturn(PETSC_SUCCESS); 2006 } 2007 2008 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2009 { 2010 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2011 2012 PetscFunctionBegin; 2013 PetscCall(MatSetUnfactored(a->A)); 2014 PetscFunctionReturn(PETSC_SUCCESS); 2015 } 2016 2017 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2018 { 2019 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2020 Mat a, b, c, d; 2021 PetscBool flg; 2022 2023 PetscFunctionBegin; 2024 a = matA->A; 2025 b = matA->B; 2026 c = matB->A; 2027 d = matB->B; 2028 2029 PetscCall(MatEqual(a, c, &flg)); 2030 if (flg) PetscCall(MatEqual(b, d, &flg)); 2031 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2032 PetscFunctionReturn(PETSC_SUCCESS); 2033 } 2034 2035 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2036 { 2037 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2038 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2039 2040 PetscFunctionBegin; 2041 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2042 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2043 /* because of the column compression in the off-processor part of the matrix a->B, 2044 the number of columns in a->B and b->B may be different, hence we cannot call 2045 the MatCopy() directly on the two parts. If need be, we can provide a more 2046 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2047 then copying the submatrices */ 2048 PetscCall(MatCopy_Basic(A, B, str)); 2049 } else { 2050 PetscCall(MatCopy(a->A, b->A, str)); 2051 PetscCall(MatCopy(a->B, b->B, str)); 2052 } 2053 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2054 PetscFunctionReturn(PETSC_SUCCESS); 2055 } 2056 2057 /* 2058 Computes the number of nonzeros per row needed for preallocation when X and Y 2059 have different nonzero structure. 2060 */ 2061 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2062 { 2063 PetscInt i, j, k, nzx, nzy; 2064 2065 PetscFunctionBegin; 2066 /* Set the number of nonzeros in the new matrix */ 2067 for (i = 0; i < m; i++) { 2068 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2069 nzx = xi[i + 1] - xi[i]; 2070 nzy = yi[i + 1] - yi[i]; 2071 nnz[i] = 0; 2072 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2073 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2074 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2075 nnz[i]++; 2076 } 2077 for (; k < nzy; k++) nnz[i]++; 2078 } 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2083 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2084 { 2085 PetscInt m = Y->rmap->N; 2086 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2087 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2088 2089 PetscFunctionBegin; 2090 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2091 PetscFunctionReturn(PETSC_SUCCESS); 2092 } 2093 2094 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2095 { 2096 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2097 2098 PetscFunctionBegin; 2099 if (str == SAME_NONZERO_PATTERN) { 2100 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2101 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2102 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2103 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2104 } else { 2105 Mat B; 2106 PetscInt *nnz_d, *nnz_o; 2107 2108 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2109 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2110 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2111 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2112 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2113 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2114 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2115 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2116 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2117 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2118 PetscCall(MatHeaderMerge(Y, &B)); 2119 PetscCall(PetscFree(nnz_d)); 2120 PetscCall(PetscFree(nnz_o)); 2121 } 2122 PetscFunctionReturn(PETSC_SUCCESS); 2123 } 2124 2125 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2126 2127 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2128 { 2129 PetscFunctionBegin; 2130 if (PetscDefined(USE_COMPLEX)) { 2131 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2132 2133 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2134 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2135 } 2136 PetscFunctionReturn(PETSC_SUCCESS); 2137 } 2138 2139 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2140 { 2141 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2142 2143 PetscFunctionBegin; 2144 PetscCall(MatRealPart(a->A)); 2145 PetscCall(MatRealPart(a->B)); 2146 PetscFunctionReturn(PETSC_SUCCESS); 2147 } 2148 2149 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2150 { 2151 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2152 2153 PetscFunctionBegin; 2154 PetscCall(MatImaginaryPart(a->A)); 2155 PetscCall(MatImaginaryPart(a->B)); 2156 PetscFunctionReturn(PETSC_SUCCESS); 2157 } 2158 2159 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2160 { 2161 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2162 PetscInt i, *idxb = NULL, m = A->rmap->n; 2163 PetscScalar *vv; 2164 Vec vB, vA; 2165 const PetscScalar *va, *vb; 2166 2167 PetscFunctionBegin; 2168 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2169 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2170 2171 PetscCall(VecGetArrayRead(vA, &va)); 2172 if (idx) { 2173 for (i = 0; i < m; i++) { 2174 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2175 } 2176 } 2177 2178 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2179 PetscCall(PetscMalloc1(m, &idxb)); 2180 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2181 2182 PetscCall(VecGetArrayWrite(v, &vv)); 2183 PetscCall(VecGetArrayRead(vB, &vb)); 2184 for (i = 0; i < m; i++) { 2185 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2186 vv[i] = vb[i]; 2187 if (idx) idx[i] = a->garray[idxb[i]]; 2188 } else { 2189 vv[i] = va[i]; 2190 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2191 } 2192 } 2193 PetscCall(VecRestoreArrayWrite(v, &vv)); 2194 PetscCall(VecRestoreArrayRead(vA, &va)); 2195 PetscCall(VecRestoreArrayRead(vB, &vb)); 2196 PetscCall(PetscFree(idxb)); 2197 PetscCall(VecDestroy(&vA)); 2198 PetscCall(VecDestroy(&vB)); 2199 PetscFunctionReturn(PETSC_SUCCESS); 2200 } 2201 2202 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2203 { 2204 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2205 Vec vB, vA; 2206 2207 PetscFunctionBegin; 2208 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2209 PetscCall(MatGetRowSumAbs(a->A, vA)); 2210 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2211 PetscCall(MatGetRowSumAbs(a->B, vB)); 2212 PetscCall(VecAXPY(vA, 1.0, vB)); 2213 PetscCall(VecDestroy(&vB)); 2214 PetscCall(VecCopy(vA, v)); 2215 PetscCall(VecDestroy(&vA)); 2216 PetscFunctionReturn(PETSC_SUCCESS); 2217 } 2218 2219 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2220 { 2221 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2222 PetscInt m = A->rmap->n, n = A->cmap->n; 2223 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2224 PetscInt *cmap = mat->garray; 2225 PetscInt *diagIdx, *offdiagIdx; 2226 Vec diagV, offdiagV; 2227 PetscScalar *a, *diagA, *offdiagA; 2228 const PetscScalar *ba, *bav; 2229 PetscInt r, j, col, ncols, *bi, *bj; 2230 Mat B = mat->B; 2231 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2232 2233 PetscFunctionBegin; 2234 /* When a process holds entire A and other processes have no entry */ 2235 if (A->cmap->N == n) { 2236 PetscCall(VecGetArrayWrite(v, &diagA)); 2237 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2238 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2239 PetscCall(VecDestroy(&diagV)); 2240 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2241 PetscFunctionReturn(PETSC_SUCCESS); 2242 } else if (n == 0) { 2243 if (m) { 2244 PetscCall(VecGetArrayWrite(v, &a)); 2245 for (r = 0; r < m; r++) { 2246 a[r] = 0.0; 2247 if (idx) idx[r] = -1; 2248 } 2249 PetscCall(VecRestoreArrayWrite(v, &a)); 2250 } 2251 PetscFunctionReturn(PETSC_SUCCESS); 2252 } 2253 2254 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2256 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2257 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2258 2259 /* Get offdiagIdx[] for implicit 0.0 */ 2260 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2261 ba = bav; 2262 bi = b->i; 2263 bj = b->j; 2264 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2265 for (r = 0; r < m; r++) { 2266 ncols = bi[r + 1] - bi[r]; 2267 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2268 offdiagA[r] = *ba; 2269 offdiagIdx[r] = cmap[0]; 2270 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2271 offdiagA[r] = 0.0; 2272 2273 /* Find first hole in the cmap */ 2274 for (j = 0; j < ncols; j++) { 2275 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2276 if (col > j && j < cstart) { 2277 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2278 break; 2279 } else if (col > j + n && j >= cstart) { 2280 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2281 break; 2282 } 2283 } 2284 if (j == ncols && ncols < A->cmap->N - n) { 2285 /* a hole is outside compressed Bcols */ 2286 if (ncols == 0) { 2287 if (cstart) { 2288 offdiagIdx[r] = 0; 2289 } else offdiagIdx[r] = cend; 2290 } else { /* ncols > 0 */ 2291 offdiagIdx[r] = cmap[ncols - 1] + 1; 2292 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2293 } 2294 } 2295 } 2296 2297 for (j = 0; j < ncols; j++) { 2298 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2299 offdiagA[r] = *ba; 2300 offdiagIdx[r] = cmap[*bj]; 2301 } 2302 ba++; 2303 bj++; 2304 } 2305 } 2306 2307 PetscCall(VecGetArrayWrite(v, &a)); 2308 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2309 for (r = 0; r < m; ++r) { 2310 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2311 a[r] = diagA[r]; 2312 if (idx) idx[r] = cstart + diagIdx[r]; 2313 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2314 a[r] = diagA[r]; 2315 if (idx) { 2316 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2317 idx[r] = cstart + diagIdx[r]; 2318 } else idx[r] = offdiagIdx[r]; 2319 } 2320 } else { 2321 a[r] = offdiagA[r]; 2322 if (idx) idx[r] = offdiagIdx[r]; 2323 } 2324 } 2325 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2326 PetscCall(VecRestoreArrayWrite(v, &a)); 2327 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2328 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2329 PetscCall(VecDestroy(&diagV)); 2330 PetscCall(VecDestroy(&offdiagV)); 2331 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2332 PetscFunctionReturn(PETSC_SUCCESS); 2333 } 2334 2335 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2336 { 2337 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2338 PetscInt m = A->rmap->n, n = A->cmap->n; 2339 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2340 PetscInt *cmap = mat->garray; 2341 PetscInt *diagIdx, *offdiagIdx; 2342 Vec diagV, offdiagV; 2343 PetscScalar *a, *diagA, *offdiagA; 2344 const PetscScalar *ba, *bav; 2345 PetscInt r, j, col, ncols, *bi, *bj; 2346 Mat B = mat->B; 2347 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2348 2349 PetscFunctionBegin; 2350 /* When a process holds entire A and other processes have no entry */ 2351 if (A->cmap->N == n) { 2352 PetscCall(VecGetArrayWrite(v, &diagA)); 2353 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2354 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2355 PetscCall(VecDestroy(&diagV)); 2356 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2357 PetscFunctionReturn(PETSC_SUCCESS); 2358 } else if (n == 0) { 2359 if (m) { 2360 PetscCall(VecGetArrayWrite(v, &a)); 2361 for (r = 0; r < m; r++) { 2362 a[r] = PETSC_MAX_REAL; 2363 if (idx) idx[r] = -1; 2364 } 2365 PetscCall(VecRestoreArrayWrite(v, &a)); 2366 } 2367 PetscFunctionReturn(PETSC_SUCCESS); 2368 } 2369 2370 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2372 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2373 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2374 2375 /* Get offdiagIdx[] for implicit 0.0 */ 2376 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2377 ba = bav; 2378 bi = b->i; 2379 bj = b->j; 2380 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2381 for (r = 0; r < m; r++) { 2382 ncols = bi[r + 1] - bi[r]; 2383 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2384 offdiagA[r] = *ba; 2385 offdiagIdx[r] = cmap[0]; 2386 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2387 offdiagA[r] = 0.0; 2388 2389 /* Find first hole in the cmap */ 2390 for (j = 0; j < ncols; j++) { 2391 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2392 if (col > j && j < cstart) { 2393 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2394 break; 2395 } else if (col > j + n && j >= cstart) { 2396 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2397 break; 2398 } 2399 } 2400 if (j == ncols && ncols < A->cmap->N - n) { 2401 /* a hole is outside compressed Bcols */ 2402 if (ncols == 0) { 2403 if (cstart) { 2404 offdiagIdx[r] = 0; 2405 } else offdiagIdx[r] = cend; 2406 } else { /* ncols > 0 */ 2407 offdiagIdx[r] = cmap[ncols - 1] + 1; 2408 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2409 } 2410 } 2411 } 2412 2413 for (j = 0; j < ncols; j++) { 2414 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2415 offdiagA[r] = *ba; 2416 offdiagIdx[r] = cmap[*bj]; 2417 } 2418 ba++; 2419 bj++; 2420 } 2421 } 2422 2423 PetscCall(VecGetArrayWrite(v, &a)); 2424 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2425 for (r = 0; r < m; ++r) { 2426 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2427 a[r] = diagA[r]; 2428 if (idx) idx[r] = cstart + diagIdx[r]; 2429 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2430 a[r] = diagA[r]; 2431 if (idx) { 2432 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2433 idx[r] = cstart + diagIdx[r]; 2434 } else idx[r] = offdiagIdx[r]; 2435 } 2436 } else { 2437 a[r] = offdiagA[r]; 2438 if (idx) idx[r] = offdiagIdx[r]; 2439 } 2440 } 2441 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2442 PetscCall(VecRestoreArrayWrite(v, &a)); 2443 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2444 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2445 PetscCall(VecDestroy(&diagV)); 2446 PetscCall(VecDestroy(&offdiagV)); 2447 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2448 PetscFunctionReturn(PETSC_SUCCESS); 2449 } 2450 2451 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2452 { 2453 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2454 PetscInt m = A->rmap->n, n = A->cmap->n; 2455 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2456 PetscInt *cmap = mat->garray; 2457 PetscInt *diagIdx, *offdiagIdx; 2458 Vec diagV, offdiagV; 2459 PetscScalar *a, *diagA, *offdiagA; 2460 const PetscScalar *ba, *bav; 2461 PetscInt r, j, col, ncols, *bi, *bj; 2462 Mat B = mat->B; 2463 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2464 2465 PetscFunctionBegin; 2466 /* When a process holds entire A and other processes have no entry */ 2467 if (A->cmap->N == n) { 2468 PetscCall(VecGetArrayWrite(v, &diagA)); 2469 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2470 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2471 PetscCall(VecDestroy(&diagV)); 2472 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2473 PetscFunctionReturn(PETSC_SUCCESS); 2474 } else if (n == 0) { 2475 if (m) { 2476 PetscCall(VecGetArrayWrite(v, &a)); 2477 for (r = 0; r < m; r++) { 2478 a[r] = PETSC_MIN_REAL; 2479 if (idx) idx[r] = -1; 2480 } 2481 PetscCall(VecRestoreArrayWrite(v, &a)); 2482 } 2483 PetscFunctionReturn(PETSC_SUCCESS); 2484 } 2485 2486 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2488 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2489 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2490 2491 /* Get offdiagIdx[] for implicit 0.0 */ 2492 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2493 ba = bav; 2494 bi = b->i; 2495 bj = b->j; 2496 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2497 for (r = 0; r < m; r++) { 2498 ncols = bi[r + 1] - bi[r]; 2499 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2500 offdiagA[r] = *ba; 2501 offdiagIdx[r] = cmap[0]; 2502 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2503 offdiagA[r] = 0.0; 2504 2505 /* Find first hole in the cmap */ 2506 for (j = 0; j < ncols; j++) { 2507 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2508 if (col > j && j < cstart) { 2509 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2510 break; 2511 } else if (col > j + n && j >= cstart) { 2512 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2513 break; 2514 } 2515 } 2516 if (j == ncols && ncols < A->cmap->N - n) { 2517 /* a hole is outside compressed Bcols */ 2518 if (ncols == 0) { 2519 if (cstart) { 2520 offdiagIdx[r] = 0; 2521 } else offdiagIdx[r] = cend; 2522 } else { /* ncols > 0 */ 2523 offdiagIdx[r] = cmap[ncols - 1] + 1; 2524 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2525 } 2526 } 2527 } 2528 2529 for (j = 0; j < ncols; j++) { 2530 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2531 offdiagA[r] = *ba; 2532 offdiagIdx[r] = cmap[*bj]; 2533 } 2534 ba++; 2535 bj++; 2536 } 2537 } 2538 2539 PetscCall(VecGetArrayWrite(v, &a)); 2540 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2541 for (r = 0; r < m; ++r) { 2542 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2543 a[r] = diagA[r]; 2544 if (idx) idx[r] = cstart + diagIdx[r]; 2545 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2546 a[r] = diagA[r]; 2547 if (idx) { 2548 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2549 idx[r] = cstart + diagIdx[r]; 2550 } else idx[r] = offdiagIdx[r]; 2551 } 2552 } else { 2553 a[r] = offdiagA[r]; 2554 if (idx) idx[r] = offdiagIdx[r]; 2555 } 2556 } 2557 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2558 PetscCall(VecRestoreArrayWrite(v, &a)); 2559 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2560 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2561 PetscCall(VecDestroy(&diagV)); 2562 PetscCall(VecDestroy(&offdiagV)); 2563 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2564 PetscFunctionReturn(PETSC_SUCCESS); 2565 } 2566 2567 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2568 { 2569 Mat *dummy; 2570 2571 PetscFunctionBegin; 2572 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2573 *newmat = *dummy; 2574 PetscCall(PetscFree(dummy)); 2575 PetscFunctionReturn(PETSC_SUCCESS); 2576 } 2577 2578 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2579 { 2580 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2581 2582 PetscFunctionBegin; 2583 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2584 A->factorerrortype = a->A->factorerrortype; 2585 PetscFunctionReturn(PETSC_SUCCESS); 2586 } 2587 2588 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2589 { 2590 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2591 2592 PetscFunctionBegin; 2593 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2594 PetscCall(MatSetRandom(aij->A, rctx)); 2595 if (x->assembled) { 2596 PetscCall(MatSetRandom(aij->B, rctx)); 2597 } else { 2598 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2599 } 2600 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2601 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2602 PetscFunctionReturn(PETSC_SUCCESS); 2603 } 2604 2605 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2606 { 2607 PetscFunctionBegin; 2608 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2609 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2610 PetscFunctionReturn(PETSC_SUCCESS); 2611 } 2612 2613 /*@ 2614 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2615 2616 Not Collective 2617 2618 Input Parameter: 2619 . A - the matrix 2620 2621 Output Parameter: 2622 . nz - the number of nonzeros 2623 2624 Level: advanced 2625 2626 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2627 @*/ 2628 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2629 { 2630 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2631 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2632 PetscBool isaij; 2633 2634 PetscFunctionBegin; 2635 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2636 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2637 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2638 PetscFunctionReturn(PETSC_SUCCESS); 2639 } 2640 2641 /*@ 2642 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2643 2644 Collective 2645 2646 Input Parameters: 2647 + A - the matrix 2648 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2649 2650 Level: advanced 2651 2652 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2653 @*/ 2654 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2655 { 2656 PetscFunctionBegin; 2657 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2658 PetscFunctionReturn(PETSC_SUCCESS); 2659 } 2660 2661 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2662 { 2663 PetscBool sc = PETSC_FALSE, flg; 2664 2665 PetscFunctionBegin; 2666 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2667 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2668 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2669 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2670 PetscOptionsHeadEnd(); 2671 PetscFunctionReturn(PETSC_SUCCESS); 2672 } 2673 2674 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2675 { 2676 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2677 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2678 2679 PetscFunctionBegin; 2680 if (!Y->preallocated) { 2681 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2682 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2683 PetscInt nonew = aij->nonew; 2684 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2685 aij->nonew = nonew; 2686 } 2687 PetscCall(MatShift_Basic(Y, a)); 2688 PetscFunctionReturn(PETSC_SUCCESS); 2689 } 2690 2691 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2692 { 2693 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2694 2695 PetscFunctionBegin; 2696 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2697 PetscFunctionReturn(PETSC_SUCCESS); 2698 } 2699 2700 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2701 { 2702 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2703 2704 PetscFunctionBegin; 2705 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2706 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2707 PetscFunctionReturn(PETSC_SUCCESS); 2708 } 2709 2710 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2711 MatGetRow_MPIAIJ, 2712 MatRestoreRow_MPIAIJ, 2713 MatMult_MPIAIJ, 2714 /* 4*/ MatMultAdd_MPIAIJ, 2715 MatMultTranspose_MPIAIJ, 2716 MatMultTransposeAdd_MPIAIJ, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*10*/ NULL, 2721 NULL, 2722 NULL, 2723 MatSOR_MPIAIJ, 2724 MatTranspose_MPIAIJ, 2725 /*15*/ MatGetInfo_MPIAIJ, 2726 MatEqual_MPIAIJ, 2727 MatGetDiagonal_MPIAIJ, 2728 MatDiagonalScale_MPIAIJ, 2729 MatNorm_MPIAIJ, 2730 /*20*/ MatAssemblyBegin_MPIAIJ, 2731 MatAssemblyEnd_MPIAIJ, 2732 MatSetOption_MPIAIJ, 2733 MatZeroEntries_MPIAIJ, 2734 /*24*/ MatZeroRows_MPIAIJ, 2735 NULL, 2736 NULL, 2737 NULL, 2738 NULL, 2739 /*29*/ MatSetUp_MPI_Hash, 2740 NULL, 2741 NULL, 2742 MatGetDiagonalBlock_MPIAIJ, 2743 NULL, 2744 /*34*/ MatDuplicate_MPIAIJ, 2745 NULL, 2746 NULL, 2747 NULL, 2748 NULL, 2749 /*39*/ MatAXPY_MPIAIJ, 2750 MatCreateSubMatrices_MPIAIJ, 2751 MatIncreaseOverlap_MPIAIJ, 2752 MatGetValues_MPIAIJ, 2753 MatCopy_MPIAIJ, 2754 /*44*/ MatGetRowMax_MPIAIJ, 2755 MatScale_MPIAIJ, 2756 MatShift_MPIAIJ, 2757 MatDiagonalSet_MPIAIJ, 2758 MatZeroRowsColumns_MPIAIJ, 2759 /*49*/ MatSetRandom_MPIAIJ, 2760 MatGetRowIJ_MPIAIJ, 2761 MatRestoreRowIJ_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*54*/ MatFDColoringCreate_MPIXAIJ, 2765 NULL, 2766 MatSetUnfactored_MPIAIJ, 2767 MatPermute_MPIAIJ, 2768 NULL, 2769 /*59*/ MatCreateSubMatrix_MPIAIJ, 2770 MatDestroy_MPIAIJ, 2771 MatView_MPIAIJ, 2772 NULL, 2773 NULL, 2774 /*64*/ MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2775 NULL, 2776 NULL, 2777 NULL, 2778 MatGetRowMaxAbs_MPIAIJ, 2779 /*69*/ MatGetRowMinAbs_MPIAIJ, 2780 NULL, 2781 NULL, 2782 MatFDColoringApply_AIJ, 2783 MatSetFromOptions_MPIAIJ, 2784 MatFindZeroDiagonals_MPIAIJ, 2785 /*75*/ NULL, 2786 NULL, 2787 NULL, 2788 MatLoad_MPIAIJ, 2789 NULL, 2790 /*80*/ NULL, 2791 NULL, 2792 NULL, 2793 /*83*/ NULL, 2794 NULL, 2795 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2796 MatPtAPNumeric_MPIAIJ_MPIAIJ, 2797 NULL, 2798 NULL, 2799 /*89*/ MatBindToCPU_MPIAIJ, 2800 MatProductSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatConjugate_MPIAIJ, 2804 /*94*/ NULL, 2805 MatSetValuesRow_MPIAIJ, 2806 MatRealPart_MPIAIJ, 2807 MatImaginaryPart_MPIAIJ, 2808 NULL, 2809 /*99*/ NULL, 2810 NULL, 2811 NULL, 2812 MatGetRowMin_MPIAIJ, 2813 NULL, 2814 /*104*/ MatGetSeqNonzeroStructure_MPIAIJ, 2815 NULL, 2816 MatGetGhosts_MPIAIJ, 2817 NULL, 2818 NULL, 2819 /*109*/ MatMultDiagonalBlock_MPIAIJ, 2820 NULL, 2821 NULL, 2822 NULL, 2823 MatGetMultiProcBlock_MPIAIJ, 2824 /*114*/ MatFindNonzeroRows_MPIAIJ, 2825 MatGetColumnReductions_MPIAIJ, 2826 MatInvertBlockDiagonal_MPIAIJ, 2827 MatInvertVariableBlockDiagonal_MPIAIJ, 2828 MatCreateSubMatricesMPI_MPIAIJ, 2829 /*119*/ NULL, 2830 NULL, 2831 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2832 NULL, 2833 NULL, 2834 /*124*/ NULL, 2835 NULL, 2836 MatSetBlockSizes_MPIAIJ, 2837 NULL, 2838 MatFDColoringSetUp_MPIXAIJ, 2839 /*129*/ MatFindOffBlockDiagonalEntries_MPIAIJ, 2840 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2841 NULL, 2842 NULL, 2843 NULL, 2844 /*134*/ MatCreateGraph_Simple_AIJ, 2845 NULL, 2846 MatEliminateZeros_MPIAIJ, 2847 MatGetRowSumAbs_MPIAIJ, 2848 NULL, 2849 /*139*/ NULL, 2850 NULL, 2851 MatCopyHashToXAIJ_MPI_Hash, 2852 MatGetCurrentMemType_MPIAIJ, 2853 NULL}; 2854 2855 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2856 { 2857 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2858 2859 PetscFunctionBegin; 2860 PetscCall(MatStoreValues(aij->A)); 2861 PetscCall(MatStoreValues(aij->B)); 2862 PetscFunctionReturn(PETSC_SUCCESS); 2863 } 2864 2865 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2866 { 2867 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2868 2869 PetscFunctionBegin; 2870 PetscCall(MatRetrieveValues(aij->A)); 2871 PetscCall(MatRetrieveValues(aij->B)); 2872 PetscFunctionReturn(PETSC_SUCCESS); 2873 } 2874 2875 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2876 { 2877 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2878 PetscMPIInt size; 2879 2880 PetscFunctionBegin; 2881 if (B->hash_active) { 2882 B->ops[0] = b->cops; 2883 B->hash_active = PETSC_FALSE; 2884 } 2885 PetscCall(PetscLayoutSetUp(B->rmap)); 2886 PetscCall(PetscLayoutSetUp(B->cmap)); 2887 2888 #if defined(PETSC_USE_CTABLE) 2889 PetscCall(PetscHMapIDestroy(&b->colmap)); 2890 #else 2891 PetscCall(PetscFree(b->colmap)); 2892 #endif 2893 PetscCall(PetscFree(b->garray)); 2894 PetscCall(VecDestroy(&b->lvec)); 2895 PetscCall(VecScatterDestroy(&b->Mvctx)); 2896 2897 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2898 2899 MatSeqXAIJGetOptions_Private(b->B); 2900 PetscCall(MatDestroy(&b->B)); 2901 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2902 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2903 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2904 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2905 MatSeqXAIJRestoreOptions_Private(b->B); 2906 2907 MatSeqXAIJGetOptions_Private(b->A); 2908 PetscCall(MatDestroy(&b->A)); 2909 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2910 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2911 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2912 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2913 MatSeqXAIJRestoreOptions_Private(b->A); 2914 2915 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2916 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2917 B->preallocated = PETSC_TRUE; 2918 B->was_assembled = PETSC_FALSE; 2919 B->assembled = PETSC_FALSE; 2920 PetscFunctionReturn(PETSC_SUCCESS); 2921 } 2922 2923 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2924 { 2925 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2926 PetscBool ondiagreset, offdiagreset, memoryreset; 2927 2928 PetscFunctionBegin; 2929 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2930 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2931 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2932 2933 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2934 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2935 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2936 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2937 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2938 2939 PetscCall(PetscLayoutSetUp(B->rmap)); 2940 PetscCall(PetscLayoutSetUp(B->cmap)); 2941 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2942 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2943 PetscCall(VecScatterDestroy(&b->Mvctx)); 2944 2945 B->preallocated = PETSC_TRUE; 2946 B->was_assembled = PETSC_FALSE; 2947 B->assembled = PETSC_FALSE; 2948 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2949 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2950 PetscFunctionReturn(PETSC_SUCCESS); 2951 } 2952 2953 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2954 { 2955 Mat mat; 2956 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2957 2958 PetscFunctionBegin; 2959 *newmat = NULL; 2960 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2961 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2962 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2963 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2964 a = (Mat_MPIAIJ *)mat->data; 2965 2966 mat->factortype = matin->factortype; 2967 mat->assembled = matin->assembled; 2968 mat->insertmode = NOT_SET_VALUES; 2969 2970 a->size = oldmat->size; 2971 a->rank = oldmat->rank; 2972 a->donotstash = oldmat->donotstash; 2973 a->roworiented = oldmat->roworiented; 2974 a->rowindices = NULL; 2975 a->rowvalues = NULL; 2976 a->getrowactive = PETSC_FALSE; 2977 2978 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2979 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2980 if (matin->hash_active) { 2981 PetscCall(MatSetUp(mat)); 2982 } else { 2983 mat->preallocated = matin->preallocated; 2984 if (oldmat->colmap) { 2985 #if defined(PETSC_USE_CTABLE) 2986 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 2987 #else 2988 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2989 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2990 #endif 2991 } else a->colmap = NULL; 2992 if (oldmat->garray) { 2993 PetscInt len; 2994 len = oldmat->B->cmap->n; 2995 PetscCall(PetscMalloc1(len, &a->garray)); 2996 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2997 } else a->garray = NULL; 2998 2999 /* It may happen MatDuplicate is called with a non-assembled matrix 3000 In fact, MatDuplicate only requires the matrix to be preallocated 3001 This may happen inside a DMCreateMatrix_Shell */ 3002 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3003 if (oldmat->Mvctx) { 3004 a->Mvctx = oldmat->Mvctx; 3005 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3006 } 3007 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3008 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3009 } 3010 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3011 *newmat = mat; 3012 PetscFunctionReturn(PETSC_SUCCESS); 3013 } 3014 3015 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3016 { 3017 PetscBool isbinary, ishdf5; 3018 3019 PetscFunctionBegin; 3020 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3021 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3022 /* force binary viewer to load .info file if it has not yet done so */ 3023 PetscCall(PetscViewerSetUp(viewer)); 3024 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3025 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3026 if (isbinary) { 3027 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3028 } else if (ishdf5) { 3029 #if defined(PETSC_HAVE_HDF5) 3030 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3031 #else 3032 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3033 #endif 3034 } else { 3035 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3036 } 3037 PetscFunctionReturn(PETSC_SUCCESS); 3038 } 3039 3040 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3041 { 3042 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3043 PetscInt *rowidxs, *colidxs; 3044 PetscScalar *matvals; 3045 3046 PetscFunctionBegin; 3047 PetscCall(PetscViewerSetUp(viewer)); 3048 3049 /* read in matrix header */ 3050 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3051 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3052 M = header[1]; 3053 N = header[2]; 3054 nz = header[3]; 3055 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3056 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3057 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3058 3059 /* set block sizes from the viewer's .info file */ 3060 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3061 /* set global sizes if not set already */ 3062 if (mat->rmap->N < 0) mat->rmap->N = M; 3063 if (mat->cmap->N < 0) mat->cmap->N = N; 3064 PetscCall(PetscLayoutSetUp(mat->rmap)); 3065 PetscCall(PetscLayoutSetUp(mat->cmap)); 3066 3067 /* check if the matrix sizes are correct */ 3068 PetscCall(MatGetSize(mat, &rows, &cols)); 3069 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3070 3071 /* read in row lengths and build row indices */ 3072 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3073 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3074 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3075 rowidxs[0] = 0; 3076 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3077 if (nz != PETSC_INT_MAX) { 3078 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3079 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3080 } 3081 3082 /* read in column indices and matrix values */ 3083 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3084 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3085 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3086 /* store matrix indices and values */ 3087 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3088 PetscCall(PetscFree(rowidxs)); 3089 PetscCall(PetscFree2(colidxs, matvals)); 3090 PetscFunctionReturn(PETSC_SUCCESS); 3091 } 3092 3093 /* Not scalable because of ISAllGather() unless getting all columns. */ 3094 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3095 { 3096 IS iscol_local; 3097 PetscBool isstride; 3098 PetscMPIInt gisstride = 0; 3099 3100 PetscFunctionBegin; 3101 /* check if we are grabbing all columns*/ 3102 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3103 3104 if (isstride) { 3105 PetscInt start, len, mstart, mlen; 3106 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3107 PetscCall(ISGetLocalSize(iscol, &len)); 3108 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3109 if (mstart == start && mlen - mstart == len) gisstride = 1; 3110 } 3111 3112 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3113 if (gisstride) { 3114 PetscInt N; 3115 PetscCall(MatGetSize(mat, NULL, &N)); 3116 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3117 PetscCall(ISSetIdentity(iscol_local)); 3118 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3119 } else { 3120 PetscInt cbs; 3121 PetscCall(ISGetBlockSize(iscol, &cbs)); 3122 PetscCall(ISAllGather(iscol, &iscol_local)); 3123 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3124 } 3125 3126 *isseq = iscol_local; 3127 PetscFunctionReturn(PETSC_SUCCESS); 3128 } 3129 3130 /* 3131 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3132 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3133 3134 Input Parameters: 3135 + mat - matrix 3136 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3137 i.e., mat->rstart <= isrow[i] < mat->rend 3138 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3139 i.e., mat->cstart <= iscol[i] < mat->cend 3140 3141 Output Parameters: 3142 + isrow_d - sequential row index set for retrieving mat->A 3143 . iscol_d - sequential column index set for retrieving mat->A 3144 . iscol_o - sequential column index set for retrieving mat->B 3145 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3146 */ 3147 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3148 { 3149 Vec x, cmap; 3150 const PetscInt *is_idx; 3151 PetscScalar *xarray, *cmaparray; 3152 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3153 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3154 Mat B = a->B; 3155 Vec lvec = a->lvec, lcmap; 3156 PetscInt i, cstart, cend, Bn = B->cmap->N; 3157 MPI_Comm comm; 3158 VecScatter Mvctx = a->Mvctx; 3159 3160 PetscFunctionBegin; 3161 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3162 PetscCall(ISGetLocalSize(iscol, &ncols)); 3163 3164 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3165 PetscCall(MatCreateVecs(mat, &x, NULL)); 3166 PetscCall(VecSet(x, -1.0)); 3167 PetscCall(VecDuplicate(x, &cmap)); 3168 PetscCall(VecSet(cmap, -1.0)); 3169 3170 /* Get start indices */ 3171 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3172 isstart -= ncols; 3173 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3174 3175 PetscCall(ISGetIndices(iscol, &is_idx)); 3176 PetscCall(VecGetArray(x, &xarray)); 3177 PetscCall(VecGetArray(cmap, &cmaparray)); 3178 PetscCall(PetscMalloc1(ncols, &idx)); 3179 for (i = 0; i < ncols; i++) { 3180 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3181 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3182 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3183 } 3184 PetscCall(VecRestoreArray(x, &xarray)); 3185 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3186 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3187 3188 /* Get iscol_d */ 3189 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3190 PetscCall(ISGetBlockSize(iscol, &i)); 3191 PetscCall(ISSetBlockSize(*iscol_d, i)); 3192 3193 /* Get isrow_d */ 3194 PetscCall(ISGetLocalSize(isrow, &m)); 3195 rstart = mat->rmap->rstart; 3196 PetscCall(PetscMalloc1(m, &idx)); 3197 PetscCall(ISGetIndices(isrow, &is_idx)); 3198 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3199 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3200 3201 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3202 PetscCall(ISGetBlockSize(isrow, &i)); 3203 PetscCall(ISSetBlockSize(*isrow_d, i)); 3204 3205 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3206 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3207 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3208 3209 PetscCall(VecDuplicate(lvec, &lcmap)); 3210 3211 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3212 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3213 3214 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3215 /* off-process column indices */ 3216 count = 0; 3217 PetscCall(PetscMalloc1(Bn, &idx)); 3218 PetscCall(PetscMalloc1(Bn, &cmap1)); 3219 3220 PetscCall(VecGetArray(lvec, &xarray)); 3221 PetscCall(VecGetArray(lcmap, &cmaparray)); 3222 for (i = 0; i < Bn; i++) { 3223 if (PetscRealPart(xarray[i]) > -1.0) { 3224 idx[count] = i; /* local column index in off-diagonal part B */ 3225 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3226 count++; 3227 } 3228 } 3229 PetscCall(VecRestoreArray(lvec, &xarray)); 3230 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3231 3232 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3233 /* cannot ensure iscol_o has same blocksize as iscol! */ 3234 3235 PetscCall(PetscFree(idx)); 3236 *garray = cmap1; 3237 3238 PetscCall(VecDestroy(&x)); 3239 PetscCall(VecDestroy(&cmap)); 3240 PetscCall(VecDestroy(&lcmap)); 3241 PetscFunctionReturn(PETSC_SUCCESS); 3242 } 3243 3244 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3245 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3246 { 3247 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3248 Mat M = NULL; 3249 MPI_Comm comm; 3250 IS iscol_d, isrow_d, iscol_o; 3251 Mat Asub = NULL, Bsub = NULL; 3252 PetscInt n, count, M_size, N_size; 3253 3254 PetscFunctionBegin; 3255 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3256 3257 if (call == MAT_REUSE_MATRIX) { 3258 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3259 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3260 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3261 3262 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3263 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3264 3265 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3266 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3267 3268 /* Update diagonal and off-diagonal portions of submat */ 3269 asub = (Mat_MPIAIJ *)(*submat)->data; 3270 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3271 PetscCall(ISGetLocalSize(iscol_o, &n)); 3272 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3273 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3274 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3275 3276 } else { /* call == MAT_INITIAL_MATRIX) */ 3277 PetscInt *garray, *garray_compact; 3278 PetscInt BsubN; 3279 3280 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3281 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3282 3283 /* Create local submatrices Asub and Bsub */ 3284 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3285 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3286 3287 // Compact garray so its not of size Bn 3288 PetscCall(ISGetSize(iscol_o, &count)); 3289 PetscCall(PetscMalloc1(count, &garray_compact)); 3290 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3291 3292 /* Create submatrix M */ 3293 PetscCall(ISGetSize(isrow, &M_size)); 3294 PetscCall(ISGetSize(iscol, &N_size)); 3295 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3296 3297 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3298 asub = (Mat_MPIAIJ *)M->data; 3299 3300 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3301 n = asub->B->cmap->N; 3302 if (BsubN > n) { 3303 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3304 const PetscInt *idx; 3305 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3306 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3307 3308 PetscCall(PetscMalloc1(n, &idx_new)); 3309 j = 0; 3310 PetscCall(ISGetIndices(iscol_o, &idx)); 3311 for (i = 0; i < n; i++) { 3312 if (j >= BsubN) break; 3313 while (subgarray[i] > garray[j]) j++; 3314 3315 PetscCheck(subgarray[i] == garray[j], PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3316 idx_new[i] = idx[j++]; 3317 } 3318 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3319 3320 PetscCall(ISDestroy(&iscol_o)); 3321 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3322 3323 } else PetscCheck(BsubN >= n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3324 3325 PetscCall(PetscFree(garray)); 3326 *submat = M; 3327 3328 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3329 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3330 PetscCall(ISDestroy(&isrow_d)); 3331 3332 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3333 PetscCall(ISDestroy(&iscol_d)); 3334 3335 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3336 PetscCall(ISDestroy(&iscol_o)); 3337 } 3338 PetscFunctionReturn(PETSC_SUCCESS); 3339 } 3340 3341 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3342 { 3343 IS iscol_local = NULL, isrow_d; 3344 PetscInt csize; 3345 PetscInt n, i, j, start, end; 3346 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3347 MPI_Comm comm; 3348 3349 PetscFunctionBegin; 3350 /* If isrow has same processor distribution as mat, 3351 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3352 if (call == MAT_REUSE_MATRIX) { 3353 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3354 if (isrow_d) { 3355 sameRowDist = PETSC_TRUE; 3356 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3357 } else { 3358 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3359 if (iscol_local) { 3360 sameRowDist = PETSC_TRUE; 3361 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3362 } 3363 } 3364 } else { 3365 /* Check if isrow has same processor distribution as mat */ 3366 sameDist[0] = PETSC_FALSE; 3367 PetscCall(ISGetLocalSize(isrow, &n)); 3368 if (!n) { 3369 sameDist[0] = PETSC_TRUE; 3370 } else { 3371 PetscCall(ISGetMinMax(isrow, &i, &j)); 3372 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3373 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3374 } 3375 3376 /* Check if iscol has same processor distribution as mat */ 3377 sameDist[1] = PETSC_FALSE; 3378 PetscCall(ISGetLocalSize(iscol, &n)); 3379 if (!n) { 3380 sameDist[1] = PETSC_TRUE; 3381 } else { 3382 PetscCall(ISGetMinMax(iscol, &i, &j)); 3383 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3384 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3385 } 3386 3387 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3388 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPI_C_BOOL, MPI_LAND, comm)); 3389 sameRowDist = tsameDist[0]; 3390 } 3391 3392 if (sameRowDist) { 3393 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3394 /* isrow and iscol have same processor distribution as mat */ 3395 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } else { /* sameRowDist */ 3398 /* isrow has same processor distribution as mat */ 3399 if (call == MAT_INITIAL_MATRIX) { 3400 PetscBool sorted; 3401 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3402 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3403 PetscCall(ISGetSize(iscol, &i)); 3404 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3405 3406 PetscCall(ISSorted(iscol_local, &sorted)); 3407 if (sorted) { 3408 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3409 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3410 PetscFunctionReturn(PETSC_SUCCESS); 3411 } 3412 } else { /* call == MAT_REUSE_MATRIX */ 3413 IS iscol_sub; 3414 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3415 if (iscol_sub) { 3416 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 } 3420 } 3421 } 3422 3423 /* General case: iscol -> iscol_local which has global size of iscol */ 3424 if (call == MAT_REUSE_MATRIX) { 3425 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3426 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3427 } else { 3428 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3429 } 3430 3431 PetscCall(ISGetLocalSize(iscol, &csize)); 3432 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3433 3434 if (call == MAT_INITIAL_MATRIX) { 3435 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3436 PetscCall(ISDestroy(&iscol_local)); 3437 } 3438 PetscFunctionReturn(PETSC_SUCCESS); 3439 } 3440 3441 /*@C 3442 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3443 and "off-diagonal" part of the matrix in CSR format. 3444 3445 Collective 3446 3447 Input Parameters: 3448 + comm - MPI communicator 3449 . M - the global row size 3450 . N - the global column size 3451 . A - "diagonal" portion of matrix 3452 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3453 - garray - either `NULL` or the global index of `B` columns. If not `NULL`, it should be allocated by `PetscMalloc1()` and will be owned by `mat` thereafter. 3454 3455 Output Parameter: 3456 . mat - the matrix, with input `A` as its local diagonal matrix 3457 3458 Level: advanced 3459 3460 Notes: 3461 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3462 3463 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3464 3465 If `garray` is `NULL`, `B` will be compacted to use local indices. In this sense, `B`'s sparsity pattern (nonzerostate) will be changed. If `B` is a device matrix, we need to somehow also update 3466 `B`'s copy on device. We do so by increasing `B`'s nonzerostate. In use of `B` on device, device matrix types should detect this change (ref. internal routines `MatSeqAIJCUSPARSECopyToGPU()` or 3467 `MatAssemblyEnd_SeqAIJKokkos()`) and will just destroy and then recreate the device copy of `B`. It is not optimal, but is easy to implement and less hacky. To avoid this overhead, try to compute `garray` 3468 yourself, see algorithms in the private function `MatSetUpMultiply_MPIAIJ()`. 3469 3470 The `NULL`-ness of `garray` doesn't need to be collective, in other words, `garray` can be `NULL` on some processes while not on others. 3471 3472 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3473 @*/ 3474 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3475 { 3476 PetscInt m, n; 3477 MatType mpi_mat_type; 3478 Mat_MPIAIJ *mpiaij; 3479 Mat C; 3480 3481 PetscFunctionBegin; 3482 PetscCall(MatCreate(comm, &C)); 3483 PetscCall(MatGetSize(A, &m, &n)); 3484 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3485 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3486 3487 PetscCall(MatSetSizes(C, m, n, M, N)); 3488 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3489 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3490 PetscCall(MatSetType(C, mpi_mat_type)); 3491 if (!garray) { 3492 const PetscScalar *ba; 3493 3494 B->nonzerostate++; 3495 PetscCall(MatSeqAIJGetArrayRead(B, &ba)); /* Since we will destroy B's device copy, we need to make sure the host copy is up to date */ 3496 PetscCall(MatSeqAIJRestoreArrayRead(B, &ba)); 3497 } 3498 3499 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3500 PetscCall(PetscLayoutSetUp(C->rmap)); 3501 PetscCall(PetscLayoutSetUp(C->cmap)); 3502 3503 mpiaij = (Mat_MPIAIJ *)C->data; 3504 mpiaij->A = A; 3505 mpiaij->B = B; 3506 mpiaij->garray = garray; 3507 C->preallocated = PETSC_TRUE; 3508 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3509 3510 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3511 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3512 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3513 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3514 */ 3515 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3516 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3517 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3518 *mat = C; 3519 PetscFunctionReturn(PETSC_SUCCESS); 3520 } 3521 3522 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3523 3524 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3525 { 3526 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3527 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3528 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3529 Mat M, Msub, B = a->B; 3530 MatScalar *aa; 3531 Mat_SeqAIJ *aij; 3532 PetscInt *garray = a->garray, *colsub, Ncols; 3533 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3534 IS iscol_sub, iscmap; 3535 const PetscInt *is_idx, *cmap; 3536 PetscBool allcolumns = PETSC_FALSE; 3537 MPI_Comm comm; 3538 3539 PetscFunctionBegin; 3540 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3541 if (call == MAT_REUSE_MATRIX) { 3542 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3543 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3544 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3545 3546 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3547 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3548 3549 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3550 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3551 3552 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3553 3554 } else { /* call == MAT_INITIAL_MATRIX) */ 3555 PetscBool flg; 3556 3557 PetscCall(ISGetLocalSize(iscol, &n)); 3558 PetscCall(ISGetSize(iscol, &Ncols)); 3559 3560 /* (1) iscol -> nonscalable iscol_local */ 3561 /* Check for special case: each processor gets entire matrix columns */ 3562 PetscCall(ISIdentity(iscol_local, &flg)); 3563 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3564 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3565 if (allcolumns) { 3566 iscol_sub = iscol_local; 3567 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3568 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3569 3570 } else { 3571 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3572 PetscInt *idx, *cmap1, k; 3573 PetscCall(PetscMalloc1(Ncols, &idx)); 3574 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3575 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3576 count = 0; 3577 k = 0; 3578 for (i = 0; i < Ncols; i++) { 3579 j = is_idx[i]; 3580 if (j >= cstart && j < cend) { 3581 /* diagonal part of mat */ 3582 idx[count] = j; 3583 cmap1[count++] = i; /* column index in submat */ 3584 } else if (Bn) { 3585 /* off-diagonal part of mat */ 3586 if (j == garray[k]) { 3587 idx[count] = j; 3588 cmap1[count++] = i; /* column index in submat */ 3589 } else if (j > garray[k]) { 3590 while (j > garray[k] && k < Bn - 1) k++; 3591 if (j == garray[k]) { 3592 idx[count] = j; 3593 cmap1[count++] = i; /* column index in submat */ 3594 } 3595 } 3596 } 3597 } 3598 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3599 3600 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3601 PetscCall(ISGetBlockSize(iscol, &cbs)); 3602 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3603 3604 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3605 } 3606 3607 /* (3) Create sequential Msub */ 3608 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3609 } 3610 3611 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3612 aij = (Mat_SeqAIJ *)Msub->data; 3613 ii = aij->i; 3614 PetscCall(ISGetIndices(iscmap, &cmap)); 3615 3616 /* 3617 m - number of local rows 3618 Ncols - number of columns (same on all processors) 3619 rstart - first row in new global matrix generated 3620 */ 3621 PetscCall(MatGetSize(Msub, &m, NULL)); 3622 3623 if (call == MAT_INITIAL_MATRIX) { 3624 /* (4) Create parallel newmat */ 3625 PetscMPIInt rank, size; 3626 PetscInt csize; 3627 3628 PetscCallMPI(MPI_Comm_size(comm, &size)); 3629 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3630 3631 /* 3632 Determine the number of non-zeros in the diagonal and off-diagonal 3633 portions of the matrix in order to do correct preallocation 3634 */ 3635 3636 /* first get start and end of "diagonal" columns */ 3637 PetscCall(ISGetLocalSize(iscol, &csize)); 3638 if (csize == PETSC_DECIDE) { 3639 PetscCall(ISGetSize(isrow, &mglobal)); 3640 if (mglobal == Ncols) { /* square matrix */ 3641 nlocal = m; 3642 } else { 3643 nlocal = Ncols / size + ((Ncols % size) > rank); 3644 } 3645 } else { 3646 nlocal = csize; 3647 } 3648 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3649 rstart = rend - nlocal; 3650 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3651 3652 /* next, compute all the lengths */ 3653 jj = aij->j; 3654 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3655 olens = dlens + m; 3656 for (i = 0; i < m; i++) { 3657 jend = ii[i + 1] - ii[i]; 3658 olen = 0; 3659 dlen = 0; 3660 for (j = 0; j < jend; j++) { 3661 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3662 else dlen++; 3663 jj++; 3664 } 3665 olens[i] = olen; 3666 dlens[i] = dlen; 3667 } 3668 3669 PetscCall(ISGetBlockSize(isrow, &bs)); 3670 PetscCall(ISGetBlockSize(iscol, &cbs)); 3671 3672 PetscCall(MatCreate(comm, &M)); 3673 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3674 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3675 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3676 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3677 PetscCall(PetscFree(dlens)); 3678 3679 } else { /* call == MAT_REUSE_MATRIX */ 3680 M = *newmat; 3681 PetscCall(MatGetLocalSize(M, &i, NULL)); 3682 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3683 PetscCall(MatZeroEntries(M)); 3684 /* 3685 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3686 rather than the slower MatSetValues(). 3687 */ 3688 M->was_assembled = PETSC_TRUE; 3689 M->assembled = PETSC_FALSE; 3690 } 3691 3692 /* (5) Set values of Msub to *newmat */ 3693 PetscCall(PetscMalloc1(count, &colsub)); 3694 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3695 3696 jj = aij->j; 3697 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3698 for (i = 0; i < m; i++) { 3699 row = rstart + i; 3700 nz = ii[i + 1] - ii[i]; 3701 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3702 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3703 jj += nz; 3704 aa += nz; 3705 } 3706 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3707 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3708 3709 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3710 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3711 3712 PetscCall(PetscFree(colsub)); 3713 3714 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3715 if (call == MAT_INITIAL_MATRIX) { 3716 *newmat = M; 3717 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3718 PetscCall(MatDestroy(&Msub)); 3719 3720 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3721 PetscCall(ISDestroy(&iscol_sub)); 3722 3723 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3724 PetscCall(ISDestroy(&iscmap)); 3725 3726 if (iscol_local) { 3727 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3728 PetscCall(ISDestroy(&iscol_local)); 3729 } 3730 } 3731 PetscFunctionReturn(PETSC_SUCCESS); 3732 } 3733 3734 /* 3735 Not great since it makes two copies of the submatrix, first an SeqAIJ 3736 in local and then by concatenating the local matrices the end result. 3737 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3738 3739 This requires a sequential iscol with all indices. 3740 */ 3741 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3742 { 3743 PetscMPIInt rank, size; 3744 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3745 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3746 Mat M, Mreuse; 3747 MatScalar *aa, *vwork; 3748 MPI_Comm comm; 3749 Mat_SeqAIJ *aij; 3750 PetscBool colflag, allcolumns = PETSC_FALSE; 3751 3752 PetscFunctionBegin; 3753 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3754 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3755 PetscCallMPI(MPI_Comm_size(comm, &size)); 3756 3757 /* Check for special case: each processor gets entire matrix columns */ 3758 PetscCall(ISIdentity(iscol, &colflag)); 3759 PetscCall(ISGetLocalSize(iscol, &n)); 3760 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3761 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3762 3763 if (call == MAT_REUSE_MATRIX) { 3764 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3765 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3766 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3767 } else { 3768 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3769 } 3770 3771 /* 3772 m - number of local rows 3773 n - number of columns (same on all processors) 3774 rstart - first row in new global matrix generated 3775 */ 3776 PetscCall(MatGetSize(Mreuse, &m, &n)); 3777 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3778 if (call == MAT_INITIAL_MATRIX) { 3779 aij = (Mat_SeqAIJ *)Mreuse->data; 3780 ii = aij->i; 3781 jj = aij->j; 3782 3783 /* 3784 Determine the number of non-zeros in the diagonal and off-diagonal 3785 portions of the matrix in order to do correct preallocation 3786 */ 3787 3788 /* first get start and end of "diagonal" columns */ 3789 if (csize == PETSC_DECIDE) { 3790 PetscCall(ISGetSize(isrow, &mglobal)); 3791 if (mglobal == n) { /* square matrix */ 3792 nlocal = m; 3793 } else { 3794 nlocal = n / size + ((n % size) > rank); 3795 } 3796 } else { 3797 nlocal = csize; 3798 } 3799 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3800 rstart = rend - nlocal; 3801 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3802 3803 /* next, compute all the lengths */ 3804 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3805 olens = dlens + m; 3806 for (i = 0; i < m; i++) { 3807 jend = ii[i + 1] - ii[i]; 3808 olen = 0; 3809 dlen = 0; 3810 for (j = 0; j < jend; j++) { 3811 if (*jj < rstart || *jj >= rend) olen++; 3812 else dlen++; 3813 jj++; 3814 } 3815 olens[i] = olen; 3816 dlens[i] = dlen; 3817 } 3818 PetscCall(MatCreate(comm, &M)); 3819 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3820 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3821 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3822 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3823 PetscCall(PetscFree(dlens)); 3824 } else { 3825 PetscInt ml, nl; 3826 3827 M = *newmat; 3828 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3829 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3830 PetscCall(MatZeroEntries(M)); 3831 /* 3832 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3833 rather than the slower MatSetValues(). 3834 */ 3835 M->was_assembled = PETSC_TRUE; 3836 M->assembled = PETSC_FALSE; 3837 } 3838 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3839 aij = (Mat_SeqAIJ *)Mreuse->data; 3840 ii = aij->i; 3841 jj = aij->j; 3842 3843 /* trigger copy to CPU if needed */ 3844 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3845 for (i = 0; i < m; i++) { 3846 row = rstart + i; 3847 nz = ii[i + 1] - ii[i]; 3848 cwork = jj; 3849 jj = PetscSafePointerPlusOffset(jj, nz); 3850 vwork = aa; 3851 aa = PetscSafePointerPlusOffset(aa, nz); 3852 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3853 } 3854 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3855 3856 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3857 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3858 *newmat = M; 3859 3860 /* save submatrix used in processor for next request */ 3861 if (call == MAT_INITIAL_MATRIX) { 3862 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3863 PetscCall(MatDestroy(&Mreuse)); 3864 } 3865 PetscFunctionReturn(PETSC_SUCCESS); 3866 } 3867 3868 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3869 { 3870 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3871 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3872 const PetscInt *JJ; 3873 PetscBool nooffprocentries; 3874 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3875 3876 PetscFunctionBegin; 3877 PetscCall(PetscLayoutSetUp(B->rmap)); 3878 PetscCall(PetscLayoutSetUp(B->cmap)); 3879 m = B->rmap->n; 3880 cstart = B->cmap->rstart; 3881 cend = B->cmap->rend; 3882 rstart = B->rmap->rstart; 3883 irstart = Ii[0]; 3884 3885 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3886 3887 if (PetscDefined(USE_DEBUG)) { 3888 for (i = 0; i < m; i++) { 3889 nnz = Ii[i + 1] - Ii[i]; 3890 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3891 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3892 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3893 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3894 } 3895 } 3896 3897 for (i = 0; i < m; i++) { 3898 nnz = Ii[i + 1] - Ii[i]; 3899 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3900 nnz_max = PetscMax(nnz_max, nnz); 3901 d = 0; 3902 for (j = 0; j < nnz; j++) { 3903 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3904 } 3905 d_nnz[i] = d; 3906 o_nnz[i] = nnz - d; 3907 } 3908 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3909 PetscCall(PetscFree2(d_nnz, o_nnz)); 3910 3911 for (i = 0; i < m; i++) { 3912 ii = i + rstart; 3913 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3914 } 3915 nooffprocentries = B->nooffprocentries; 3916 B->nooffprocentries = PETSC_TRUE; 3917 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3918 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3919 B->nooffprocentries = nooffprocentries; 3920 3921 /* count number of entries below block diagonal */ 3922 PetscCall(PetscFree(Aij->ld)); 3923 PetscCall(PetscCalloc1(m, &ld)); 3924 Aij->ld = ld; 3925 for (i = 0; i < m; i++) { 3926 nnz = Ii[i + 1] - Ii[i]; 3927 j = 0; 3928 while (j < nnz && J[j] < cstart) j++; 3929 ld[i] = j; 3930 if (J) J += nnz; 3931 } 3932 3933 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3934 PetscFunctionReturn(PETSC_SUCCESS); 3935 } 3936 3937 /*@ 3938 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3939 (the default parallel PETSc format). 3940 3941 Collective 3942 3943 Input Parameters: 3944 + B - the matrix 3945 . i - the indices into `j` for the start of each local row (indices start with zero) 3946 . j - the column indices for each local row (indices start with zero) 3947 - v - optional values in the matrix 3948 3949 Level: developer 3950 3951 Notes: 3952 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3953 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3954 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3955 3956 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3957 3958 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3959 3960 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3961 3962 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3963 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3964 3965 The format which is used for the sparse matrix input, is equivalent to a 3966 row-major ordering.. i.e for the following matrix, the input data expected is 3967 as shown 3968 .vb 3969 1 0 0 3970 2 0 3 P0 3971 ------- 3972 4 5 6 P1 3973 3974 Process0 [P0] rows_owned=[0,1] 3975 i = {0,1,3} [size = nrow+1 = 2+1] 3976 j = {0,0,2} [size = 3] 3977 v = {1,2,3} [size = 3] 3978 3979 Process1 [P1] rows_owned=[2] 3980 i = {0,3} [size = nrow+1 = 1+1] 3981 j = {0,1,2} [size = 3] 3982 v = {4,5,6} [size = 3] 3983 .ve 3984 3985 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 3986 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 3987 @*/ 3988 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 3989 { 3990 PetscFunctionBegin; 3991 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3992 PetscFunctionReturn(PETSC_SUCCESS); 3993 } 3994 3995 /*@ 3996 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 3997 (the default parallel PETSc format). For good matrix assembly performance 3998 the user should preallocate the matrix storage by setting the parameters 3999 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4000 4001 Collective 4002 4003 Input Parameters: 4004 + B - the matrix 4005 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4006 (same value is used for all local rows) 4007 . d_nnz - array containing the number of nonzeros in the various rows of the 4008 DIAGONAL portion of the local submatrix (possibly different for each row) 4009 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4010 The size of this array is equal to the number of local rows, i.e 'm'. 4011 For matrices that will be factored, you must leave room for (and set) 4012 the diagonal entry even if it is zero. 4013 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4014 submatrix (same value is used for all local rows). 4015 - o_nnz - array containing the number of nonzeros in the various rows of the 4016 OFF-DIAGONAL portion of the local submatrix (possibly different for 4017 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4018 structure. The size of this array is equal to the number 4019 of local rows, i.e 'm'. 4020 4021 Example Usage: 4022 Consider the following 8x8 matrix with 34 non-zero values, that is 4023 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4024 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4025 as follows 4026 4027 .vb 4028 1 2 0 | 0 3 0 | 0 4 4029 Proc0 0 5 6 | 7 0 0 | 8 0 4030 9 0 10 | 11 0 0 | 12 0 4031 ------------------------------------- 4032 13 0 14 | 15 16 17 | 0 0 4033 Proc1 0 18 0 | 19 20 21 | 0 0 4034 0 0 0 | 22 23 0 | 24 0 4035 ------------------------------------- 4036 Proc2 25 26 27 | 0 0 28 | 29 0 4037 30 0 0 | 31 32 33 | 0 34 4038 .ve 4039 4040 This can be represented as a collection of submatrices as 4041 .vb 4042 A B C 4043 D E F 4044 G H I 4045 .ve 4046 4047 Where the submatrices A,B,C are owned by proc0, D,E,F are 4048 owned by proc1, G,H,I are owned by proc2. 4049 4050 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4051 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4052 The 'M','N' parameters are 8,8, and have the same values on all procs. 4053 4054 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4055 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4056 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4057 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4058 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4059 matrix, and [DF] as another `MATSEQAIJ` matrix. 4060 4061 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4062 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4063 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4064 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4065 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4066 In this case, the values of `d_nz`, `o_nz` are 4067 .vb 4068 proc0 dnz = 2, o_nz = 2 4069 proc1 dnz = 3, o_nz = 2 4070 proc2 dnz = 1, o_nz = 4 4071 .ve 4072 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4073 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4074 for proc3. i.e we are using 12+15+10=37 storage locations to store 4075 34 values. 4076 4077 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4078 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4079 In the above case the values for `d_nnz`, `o_nnz` are 4080 .vb 4081 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4082 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4083 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4084 .ve 4085 Here the space allocated is sum of all the above values i.e 34, and 4086 hence pre-allocation is perfect. 4087 4088 Level: intermediate 4089 4090 Notes: 4091 If the *_nnz parameter is given then the *_nz parameter is ignored 4092 4093 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4094 storage. The stored row and column indices begin with zero. 4095 See [Sparse Matrices](sec_matsparse) for details. 4096 4097 The parallel matrix is partitioned such that the first m0 rows belong to 4098 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4099 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4100 4101 The DIAGONAL portion of the local submatrix of a processor can be defined 4102 as the submatrix which is obtained by extraction the part corresponding to 4103 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4104 first row that belongs to the processor, r2 is the last row belonging to 4105 the this processor, and c1-c2 is range of indices of the local part of a 4106 vector suitable for applying the matrix to. This is an mxn matrix. In the 4107 common case of a square matrix, the row and column ranges are the same and 4108 the DIAGONAL part is also square. The remaining portion of the local 4109 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4110 4111 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4112 4113 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4114 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4115 You can also run with the option `-info` and look for messages with the string 4116 malloc in them to see if additional memory allocation was needed. 4117 4118 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4119 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4120 @*/ 4121 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4122 { 4123 PetscFunctionBegin; 4124 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4125 PetscValidType(B, 1); 4126 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4127 PetscFunctionReturn(PETSC_SUCCESS); 4128 } 4129 4130 /*@ 4131 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4132 CSR format for the local rows. 4133 4134 Collective 4135 4136 Input Parameters: 4137 + comm - MPI communicator 4138 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4139 . n - This value should be the same as the local size used in creating the 4140 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4141 calculated if `N` is given) For square matrices n is almost always `m`. 4142 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4143 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4144 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4145 . j - global column indices 4146 - a - optional matrix values 4147 4148 Output Parameter: 4149 . mat - the matrix 4150 4151 Level: intermediate 4152 4153 Notes: 4154 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4155 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4156 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4157 4158 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4159 4160 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4161 4162 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4163 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4164 4165 The format which is used for the sparse matrix input, is equivalent to a 4166 row-major ordering, i.e., for the following matrix, the input data expected is 4167 as shown 4168 .vb 4169 1 0 0 4170 2 0 3 P0 4171 ------- 4172 4 5 6 P1 4173 4174 Process0 [P0] rows_owned=[0,1] 4175 i = {0,1,3} [size = nrow+1 = 2+1] 4176 j = {0,0,2} [size = 3] 4177 v = {1,2,3} [size = 3] 4178 4179 Process1 [P1] rows_owned=[2] 4180 i = {0,3} [size = nrow+1 = 1+1] 4181 j = {0,1,2} [size = 3] 4182 v = {4,5,6} [size = 3] 4183 .ve 4184 4185 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4186 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4187 @*/ 4188 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4189 { 4190 PetscFunctionBegin; 4191 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4192 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4193 PetscCall(MatCreate(comm, mat)); 4194 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4195 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4196 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4197 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4198 PetscFunctionReturn(PETSC_SUCCESS); 4199 } 4200 4201 /*@ 4202 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4203 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4204 from `MatCreateMPIAIJWithArrays()` 4205 4206 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4207 4208 Collective 4209 4210 Input Parameters: 4211 + mat - the matrix 4212 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4213 . n - This value should be the same as the local size used in creating the 4214 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4215 calculated if N is given) For square matrices n is almost always m. 4216 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4217 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4218 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4219 . J - column indices 4220 - v - matrix values 4221 4222 Level: deprecated 4223 4224 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4225 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4226 @*/ 4227 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4228 { 4229 PetscInt nnz, i; 4230 PetscBool nooffprocentries; 4231 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4232 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4233 PetscScalar *ad, *ao; 4234 PetscInt ldi, Iii, md; 4235 const PetscInt *Adi = Ad->i; 4236 PetscInt *ld = Aij->ld; 4237 4238 PetscFunctionBegin; 4239 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4240 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4241 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4242 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4243 4244 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4245 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4246 4247 for (i = 0; i < m; i++) { 4248 if (PetscDefined(USE_DEBUG)) { 4249 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4250 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4251 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4252 } 4253 } 4254 nnz = Ii[i + 1] - Ii[i]; 4255 Iii = Ii[i]; 4256 ldi = ld[i]; 4257 md = Adi[i + 1] - Adi[i]; 4258 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4259 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4260 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4261 ad += md; 4262 ao += nnz - md; 4263 } 4264 nooffprocentries = mat->nooffprocentries; 4265 mat->nooffprocentries = PETSC_TRUE; 4266 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4267 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4268 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4269 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4270 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4271 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4272 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4273 mat->nooffprocentries = nooffprocentries; 4274 PetscFunctionReturn(PETSC_SUCCESS); 4275 } 4276 4277 /*@ 4278 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4279 4280 Collective 4281 4282 Input Parameters: 4283 + mat - the matrix 4284 - v - matrix values, stored by row 4285 4286 Level: intermediate 4287 4288 Notes: 4289 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4290 4291 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4292 4293 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4294 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4295 @*/ 4296 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4297 { 4298 PetscInt nnz, i, m; 4299 PetscBool nooffprocentries; 4300 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4301 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4302 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4303 PetscScalar *ad, *ao; 4304 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4305 PetscInt ldi, Iii, md; 4306 PetscInt *ld = Aij->ld; 4307 4308 PetscFunctionBegin; 4309 m = mat->rmap->n; 4310 4311 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4312 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4313 Iii = 0; 4314 for (i = 0; i < m; i++) { 4315 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4316 ldi = ld[i]; 4317 md = Adi[i + 1] - Adi[i]; 4318 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4319 ad += md; 4320 if (ao) { 4321 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4322 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4323 ao += nnz - md; 4324 } 4325 Iii += nnz; 4326 } 4327 nooffprocentries = mat->nooffprocentries; 4328 mat->nooffprocentries = PETSC_TRUE; 4329 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4330 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4331 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4332 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4333 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4334 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4335 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4336 mat->nooffprocentries = nooffprocentries; 4337 PetscFunctionReturn(PETSC_SUCCESS); 4338 } 4339 4340 /*@ 4341 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4342 (the default parallel PETSc format). For good matrix assembly performance 4343 the user should preallocate the matrix storage by setting the parameters 4344 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4345 4346 Collective 4347 4348 Input Parameters: 4349 + comm - MPI communicator 4350 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4351 This value should be the same as the local size used in creating the 4352 y vector for the matrix-vector product y = Ax. 4353 . n - This value should be the same as the local size used in creating the 4354 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4355 calculated if N is given) For square matrices n is almost always m. 4356 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4357 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4358 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4359 (same value is used for all local rows) 4360 . d_nnz - array containing the number of nonzeros in the various rows of the 4361 DIAGONAL portion of the local submatrix (possibly different for each row) 4362 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4363 The size of this array is equal to the number of local rows, i.e 'm'. 4364 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4365 submatrix (same value is used for all local rows). 4366 - o_nnz - array containing the number of nonzeros in the various rows of the 4367 OFF-DIAGONAL portion of the local submatrix (possibly different for 4368 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4369 structure. The size of this array is equal to the number 4370 of local rows, i.e 'm'. 4371 4372 Output Parameter: 4373 . A - the matrix 4374 4375 Options Database Keys: 4376 + -mat_no_inode - Do not use inodes 4377 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4378 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4379 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4380 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4381 4382 Level: intermediate 4383 4384 Notes: 4385 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4386 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4387 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4388 4389 If the *_nnz parameter is given then the *_nz parameter is ignored 4390 4391 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4392 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4393 storage requirements for this matrix. 4394 4395 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4396 processor than it must be used on all processors that share the object for 4397 that argument. 4398 4399 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4400 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4401 4402 The user MUST specify either the local or global matrix dimensions 4403 (possibly both). 4404 4405 The parallel matrix is partitioned across processors such that the 4406 first `m0` rows belong to process 0, the next `m1` rows belong to 4407 process 1, the next `m2` rows belong to process 2, etc., where 4408 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4409 values corresponding to [m x N] submatrix. 4410 4411 The columns are logically partitioned with the n0 columns belonging 4412 to 0th partition, the next n1 columns belonging to the next 4413 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4414 4415 The DIAGONAL portion of the local submatrix on any given processor 4416 is the submatrix corresponding to the rows and columns m,n 4417 corresponding to the given processor. i.e diagonal matrix on 4418 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4419 etc. The remaining portion of the local submatrix [m x (N-n)] 4420 constitute the OFF-DIAGONAL portion. The example below better 4421 illustrates this concept. The two matrices, the DIAGONAL portion and 4422 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4423 4424 For a square global matrix we define each processor's diagonal portion 4425 to be its local rows and the corresponding columns (a square submatrix); 4426 each processor's off-diagonal portion encompasses the remainder of the 4427 local matrix (a rectangular submatrix). 4428 4429 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4430 4431 When calling this routine with a single process communicator, a matrix of 4432 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4433 type of communicator, use the construction mechanism 4434 .vb 4435 MatCreate(..., &A); 4436 MatSetType(A, MATMPIAIJ); 4437 MatSetSizes(A, m, n, M, N); 4438 MatMPIAIJSetPreallocation(A, ...); 4439 .ve 4440 4441 By default, this format uses inodes (identical nodes) when possible. 4442 We search for consecutive rows with the same nonzero structure, thereby 4443 reusing matrix information to achieve increased efficiency. 4444 4445 Example Usage: 4446 Consider the following 8x8 matrix with 34 non-zero values, that is 4447 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4448 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4449 as follows 4450 4451 .vb 4452 1 2 0 | 0 3 0 | 0 4 4453 Proc0 0 5 6 | 7 0 0 | 8 0 4454 9 0 10 | 11 0 0 | 12 0 4455 ------------------------------------- 4456 13 0 14 | 15 16 17 | 0 0 4457 Proc1 0 18 0 | 19 20 21 | 0 0 4458 0 0 0 | 22 23 0 | 24 0 4459 ------------------------------------- 4460 Proc2 25 26 27 | 0 0 28 | 29 0 4461 30 0 0 | 31 32 33 | 0 34 4462 .ve 4463 4464 This can be represented as a collection of submatrices as 4465 4466 .vb 4467 A B C 4468 D E F 4469 G H I 4470 .ve 4471 4472 Where the submatrices A,B,C are owned by proc0, D,E,F are 4473 owned by proc1, G,H,I are owned by proc2. 4474 4475 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4476 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4477 The 'M','N' parameters are 8,8, and have the same values on all procs. 4478 4479 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4480 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4481 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4482 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4483 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4484 matrix, and [DF] as another SeqAIJ matrix. 4485 4486 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4487 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4488 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4489 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4490 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4491 In this case, the values of `d_nz`,`o_nz` are 4492 .vb 4493 proc0 dnz = 2, o_nz = 2 4494 proc1 dnz = 3, o_nz = 2 4495 proc2 dnz = 1, o_nz = 4 4496 .ve 4497 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4498 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4499 for proc3. i.e we are using 12+15+10=37 storage locations to store 4500 34 values. 4501 4502 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4503 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4504 In the above case the values for d_nnz,o_nnz are 4505 .vb 4506 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4507 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4508 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4509 .ve 4510 Here the space allocated is sum of all the above values i.e 34, and 4511 hence pre-allocation is perfect. 4512 4513 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4514 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4515 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4516 @*/ 4517 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4518 { 4519 PetscMPIInt size; 4520 4521 PetscFunctionBegin; 4522 PetscCall(MatCreate(comm, A)); 4523 PetscCall(MatSetSizes(*A, m, n, M, N)); 4524 PetscCallMPI(MPI_Comm_size(comm, &size)); 4525 if (size > 1) { 4526 PetscCall(MatSetType(*A, MATMPIAIJ)); 4527 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4528 } else { 4529 PetscCall(MatSetType(*A, MATSEQAIJ)); 4530 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4531 } 4532 PetscFunctionReturn(PETSC_SUCCESS); 4533 } 4534 4535 /*@C 4536 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4537 4538 Not Collective 4539 4540 Input Parameter: 4541 . A - The `MATMPIAIJ` matrix 4542 4543 Output Parameters: 4544 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4545 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4546 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4547 4548 Level: intermediate 4549 4550 Note: 4551 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4552 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4553 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4554 local column numbers to global column numbers in the original matrix. 4555 4556 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4557 @*/ 4558 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4559 { 4560 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4561 PetscBool flg; 4562 4563 PetscFunctionBegin; 4564 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4565 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4566 if (Ad) *Ad = a->A; 4567 if (Ao) *Ao = a->B; 4568 if (colmap) *colmap = a->garray; 4569 PetscFunctionReturn(PETSC_SUCCESS); 4570 } 4571 4572 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4573 { 4574 PetscInt m, N, i, rstart, nnz, Ii; 4575 PetscInt *indx; 4576 PetscScalar *values; 4577 MatType rootType; 4578 4579 PetscFunctionBegin; 4580 PetscCall(MatGetSize(inmat, &m, &N)); 4581 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4582 PetscInt *dnz, *onz, sum, bs, cbs; 4583 4584 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4585 /* Check sum(n) = N */ 4586 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4587 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4588 4589 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4590 rstart -= m; 4591 4592 MatPreallocateBegin(comm, m, n, dnz, onz); 4593 for (i = 0; i < m; i++) { 4594 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4595 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4596 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4597 } 4598 4599 PetscCall(MatCreate(comm, outmat)); 4600 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4601 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4602 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4603 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4604 PetscCall(MatSetType(*outmat, rootType)); 4605 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4606 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4607 MatPreallocateEnd(dnz, onz); 4608 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4609 } 4610 4611 /* numeric phase */ 4612 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4613 for (i = 0; i < m; i++) { 4614 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4615 Ii = i + rstart; 4616 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4617 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4618 } 4619 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4620 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4621 PetscFunctionReturn(PETSC_SUCCESS); 4622 } 4623 4624 static PetscErrorCode MatMergeSeqsToMPIDestroy(void **data) 4625 { 4626 MatMergeSeqsToMPI *merge = (MatMergeSeqsToMPI *)*data; 4627 4628 PetscFunctionBegin; 4629 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4630 PetscCall(PetscFree(merge->id_r)); 4631 PetscCall(PetscFree(merge->len_s)); 4632 PetscCall(PetscFree(merge->len_r)); 4633 PetscCall(PetscFree(merge->bi)); 4634 PetscCall(PetscFree(merge->bj)); 4635 PetscCall(PetscFree(merge->buf_ri[0])); 4636 PetscCall(PetscFree(merge->buf_ri)); 4637 PetscCall(PetscFree(merge->buf_rj[0])); 4638 PetscCall(PetscFree(merge->buf_rj)); 4639 PetscCall(PetscFree(merge->coi)); 4640 PetscCall(PetscFree(merge->coj)); 4641 PetscCall(PetscFree(merge->owners_co)); 4642 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4643 PetscCall(PetscFree(merge)); 4644 PetscFunctionReturn(PETSC_SUCCESS); 4645 } 4646 4647 #include <../src/mat/utils/freespace.h> 4648 #include <petscbt.h> 4649 4650 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4651 { 4652 MPI_Comm comm; 4653 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4654 PetscMPIInt size, rank, taga, *len_s; 4655 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4656 PetscMPIInt proc, k; 4657 PetscInt **buf_ri, **buf_rj; 4658 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4659 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4660 MPI_Request *s_waits, *r_waits; 4661 MPI_Status *status; 4662 const MatScalar *aa, *a_a; 4663 MatScalar **abuf_r, *ba_i; 4664 MatMergeSeqsToMPI *merge; 4665 PetscContainer container; 4666 4667 PetscFunctionBegin; 4668 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4669 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4670 4671 PetscCallMPI(MPI_Comm_size(comm, &size)); 4672 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4673 4674 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4675 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4676 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4677 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4678 aa = a_a; 4679 4680 bi = merge->bi; 4681 bj = merge->bj; 4682 buf_ri = merge->buf_ri; 4683 buf_rj = merge->buf_rj; 4684 4685 PetscCall(PetscMalloc1(size, &status)); 4686 owners = merge->rowmap->range; 4687 len_s = merge->len_s; 4688 4689 /* send and recv matrix values */ 4690 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4691 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4692 4693 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4694 for (proc = 0, k = 0; proc < size; proc++) { 4695 if (!len_s[proc]) continue; 4696 i = owners[proc]; 4697 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4698 k++; 4699 } 4700 4701 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4702 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4703 PetscCall(PetscFree(status)); 4704 4705 PetscCall(PetscFree(s_waits)); 4706 PetscCall(PetscFree(r_waits)); 4707 4708 /* insert mat values of mpimat */ 4709 PetscCall(PetscMalloc1(N, &ba_i)); 4710 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4711 4712 for (k = 0; k < merge->nrecv; k++) { 4713 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4714 nrows = *buf_ri_k[k]; 4715 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4716 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4717 } 4718 4719 /* set values of ba */ 4720 m = merge->rowmap->n; 4721 for (i = 0; i < m; i++) { 4722 arow = owners[rank] + i; 4723 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4724 bnzi = bi[i + 1] - bi[i]; 4725 PetscCall(PetscArrayzero(ba_i, bnzi)); 4726 4727 /* add local non-zero vals of this proc's seqmat into ba */ 4728 anzi = ai[arow + 1] - ai[arow]; 4729 aj = a->j + ai[arow]; 4730 aa = a_a + ai[arow]; 4731 nextaj = 0; 4732 for (j = 0; nextaj < anzi; j++) { 4733 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4734 ba_i[j] += aa[nextaj++]; 4735 } 4736 } 4737 4738 /* add received vals into ba */ 4739 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4740 /* i-th row */ 4741 if (i == *nextrow[k]) { 4742 anzi = *(nextai[k] + 1) - *nextai[k]; 4743 aj = buf_rj[k] + *nextai[k]; 4744 aa = abuf_r[k] + *nextai[k]; 4745 nextaj = 0; 4746 for (j = 0; nextaj < anzi; j++) { 4747 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4748 ba_i[j] += aa[nextaj++]; 4749 } 4750 } 4751 nextrow[k]++; 4752 nextai[k]++; 4753 } 4754 } 4755 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4756 } 4757 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4758 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4759 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4760 4761 PetscCall(PetscFree(abuf_r[0])); 4762 PetscCall(PetscFree(abuf_r)); 4763 PetscCall(PetscFree(ba_i)); 4764 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4765 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4766 PetscFunctionReturn(PETSC_SUCCESS); 4767 } 4768 4769 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4770 { 4771 Mat B_mpi; 4772 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4773 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4774 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4775 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4776 PetscInt len, *dnz, *onz, bs, cbs; 4777 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4778 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4779 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4780 MPI_Status *status; 4781 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4782 PetscBT lnkbt; 4783 MatMergeSeqsToMPI *merge; 4784 PetscContainer container; 4785 4786 PetscFunctionBegin; 4787 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4788 4789 /* make sure it is a PETSc comm */ 4790 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4791 PetscCallMPI(MPI_Comm_size(comm, &size)); 4792 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4793 4794 PetscCall(PetscNew(&merge)); 4795 PetscCall(PetscMalloc1(size, &status)); 4796 4797 /* determine row ownership */ 4798 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4799 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4800 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4801 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4802 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4803 PetscCall(PetscMalloc1(size, &len_si)); 4804 PetscCall(PetscMalloc1(size, &merge->len_s)); 4805 4806 m = merge->rowmap->n; 4807 owners = merge->rowmap->range; 4808 4809 /* determine the number of messages to send, their lengths */ 4810 len_s = merge->len_s; 4811 4812 len = 0; /* length of buf_si[] */ 4813 merge->nsend = 0; 4814 for (PetscMPIInt proc = 0; proc < size; proc++) { 4815 len_si[proc] = 0; 4816 if (proc == rank) { 4817 len_s[proc] = 0; 4818 } else { 4819 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4820 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4821 } 4822 if (len_s[proc]) { 4823 merge->nsend++; 4824 nrows = 0; 4825 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4826 if (ai[i + 1] > ai[i]) nrows++; 4827 } 4828 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4829 len += len_si[proc]; 4830 } 4831 } 4832 4833 /* determine the number and length of messages to receive for ij-structure */ 4834 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4835 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4836 4837 /* post the Irecv of j-structure */ 4838 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4839 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4840 4841 /* post the Isend of j-structure */ 4842 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4843 4844 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4845 if (!len_s[proc]) continue; 4846 i = owners[proc]; 4847 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4848 k++; 4849 } 4850 4851 /* receives and sends of j-structure are complete */ 4852 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4853 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4854 4855 /* send and recv i-structure */ 4856 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4857 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4858 4859 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4860 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4861 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4862 if (!len_s[proc]) continue; 4863 /* form outgoing message for i-structure: 4864 buf_si[0]: nrows to be sent 4865 [1:nrows]: row index (global) 4866 [nrows+1:2*nrows+1]: i-structure index 4867 */ 4868 nrows = len_si[proc] / 2 - 1; 4869 buf_si_i = buf_si + nrows + 1; 4870 buf_si[0] = nrows; 4871 buf_si_i[0] = 0; 4872 nrows = 0; 4873 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4874 anzi = ai[i + 1] - ai[i]; 4875 if (anzi) { 4876 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4877 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4878 nrows++; 4879 } 4880 } 4881 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4882 k++; 4883 buf_si += len_si[proc]; 4884 } 4885 4886 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4887 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4888 4889 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4890 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4891 4892 PetscCall(PetscFree(len_si)); 4893 PetscCall(PetscFree(len_ri)); 4894 PetscCall(PetscFree(rj_waits)); 4895 PetscCall(PetscFree2(si_waits, sj_waits)); 4896 PetscCall(PetscFree(ri_waits)); 4897 PetscCall(PetscFree(buf_s)); 4898 PetscCall(PetscFree(status)); 4899 4900 /* compute a local seq matrix in each processor */ 4901 /* allocate bi array and free space for accumulating nonzero column info */ 4902 PetscCall(PetscMalloc1(m + 1, &bi)); 4903 bi[0] = 0; 4904 4905 /* create and initialize a linked list */ 4906 nlnk = N + 1; 4907 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4908 4909 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4910 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4911 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4912 4913 current_space = free_space; 4914 4915 /* determine symbolic info for each local row */ 4916 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4917 4918 for (k = 0; k < merge->nrecv; k++) { 4919 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4920 nrows = *buf_ri_k[k]; 4921 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4922 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4923 } 4924 4925 MatPreallocateBegin(comm, m, n, dnz, onz); 4926 len = 0; 4927 for (i = 0; i < m; i++) { 4928 bnzi = 0; 4929 /* add local non-zero cols of this proc's seqmat into lnk */ 4930 arow = owners[rank] + i; 4931 anzi = ai[arow + 1] - ai[arow]; 4932 aj = a->j + ai[arow]; 4933 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4934 bnzi += nlnk; 4935 /* add received col data into lnk */ 4936 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4937 if (i == *nextrow[k]) { /* i-th row */ 4938 anzi = *(nextai[k] + 1) - *nextai[k]; 4939 aj = buf_rj[k] + *nextai[k]; 4940 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4941 bnzi += nlnk; 4942 nextrow[k]++; 4943 nextai[k]++; 4944 } 4945 } 4946 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4947 4948 /* if free space is not available, make more free space */ 4949 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4950 /* copy data into free space, then initialize lnk */ 4951 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4952 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4953 4954 current_space->array += bnzi; 4955 current_space->local_used += bnzi; 4956 current_space->local_remaining -= bnzi; 4957 4958 bi[i + 1] = bi[i] + bnzi; 4959 } 4960 4961 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4962 4963 PetscCall(PetscMalloc1(bi[m], &bj)); 4964 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4965 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4966 4967 /* create symbolic parallel matrix B_mpi */ 4968 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4969 PetscCall(MatCreate(comm, &B_mpi)); 4970 if (n == PETSC_DECIDE) { 4971 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4972 } else { 4973 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4974 } 4975 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4976 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4977 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4978 MatPreallocateEnd(dnz, onz); 4979 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4980 4981 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4982 B_mpi->assembled = PETSC_FALSE; 4983 merge->bi = bi; 4984 merge->bj = bj; 4985 merge->buf_ri = buf_ri; 4986 merge->buf_rj = buf_rj; 4987 merge->coi = NULL; 4988 merge->coj = NULL; 4989 merge->owners_co = NULL; 4990 4991 PetscCall(PetscCommDestroy(&comm)); 4992 4993 /* attach the supporting struct to B_mpi for reuse */ 4994 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4995 PetscCall(PetscContainerSetPointer(container, merge)); 4996 PetscCall(PetscContainerSetCtxDestroy(container, MatMergeSeqsToMPIDestroy)); 4997 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4998 PetscCall(PetscContainerDestroy(&container)); 4999 *mpimat = B_mpi; 5000 5001 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5002 PetscFunctionReturn(PETSC_SUCCESS); 5003 } 5004 5005 /*@ 5006 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5007 matrices from each processor 5008 5009 Collective 5010 5011 Input Parameters: 5012 + comm - the communicators the parallel matrix will live on 5013 . seqmat - the input sequential matrices 5014 . m - number of local rows (or `PETSC_DECIDE`) 5015 . n - number of local columns (or `PETSC_DECIDE`) 5016 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5017 5018 Output Parameter: 5019 . mpimat - the parallel matrix generated 5020 5021 Level: advanced 5022 5023 Note: 5024 The dimensions of the sequential matrix in each processor MUST be the same. 5025 The input seqmat is included into the container `MatMergeSeqsToMPIDestroy`, and will be 5026 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5027 5028 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5029 @*/ 5030 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5031 { 5032 PetscMPIInt size; 5033 5034 PetscFunctionBegin; 5035 PetscCallMPI(MPI_Comm_size(comm, &size)); 5036 if (size == 1) { 5037 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5038 if (scall == MAT_INITIAL_MATRIX) { 5039 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5040 } else { 5041 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5042 } 5043 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5044 PetscFunctionReturn(PETSC_SUCCESS); 5045 } 5046 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5047 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5048 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5049 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5050 PetscFunctionReturn(PETSC_SUCCESS); 5051 } 5052 5053 /*@ 5054 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5055 5056 Not Collective 5057 5058 Input Parameter: 5059 . A - the matrix 5060 5061 Output Parameter: 5062 . A_loc - the local sequential matrix generated 5063 5064 Level: developer 5065 5066 Notes: 5067 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5068 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5069 `n` is the global column count obtained with `MatGetSize()` 5070 5071 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5072 5073 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5074 5075 Destroy the matrix with `MatDestroy()` 5076 5077 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5078 @*/ 5079 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5080 { 5081 PetscBool mpi; 5082 5083 PetscFunctionBegin; 5084 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5085 if (mpi) { 5086 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5087 } else { 5088 *A_loc = A; 5089 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5090 } 5091 PetscFunctionReturn(PETSC_SUCCESS); 5092 } 5093 5094 /*@ 5095 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5096 5097 Not Collective 5098 5099 Input Parameters: 5100 + A - the matrix 5101 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5102 5103 Output Parameter: 5104 . A_loc - the local sequential matrix generated 5105 5106 Level: developer 5107 5108 Notes: 5109 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5110 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5111 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5112 5113 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5114 5115 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5116 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5117 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5118 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5119 5120 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5121 @*/ 5122 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5123 { 5124 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5125 Mat_SeqAIJ *mat, *a, *b; 5126 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5127 const PetscScalar *aa, *ba, *aav, *bav; 5128 PetscScalar *ca, *cam; 5129 PetscMPIInt size; 5130 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5131 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5132 PetscBool match; 5133 5134 PetscFunctionBegin; 5135 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5136 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5137 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5138 if (size == 1) { 5139 if (scall == MAT_INITIAL_MATRIX) { 5140 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5141 *A_loc = mpimat->A; 5142 } else if (scall == MAT_REUSE_MATRIX) { 5143 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5144 } 5145 PetscFunctionReturn(PETSC_SUCCESS); 5146 } 5147 5148 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5149 a = (Mat_SeqAIJ *)mpimat->A->data; 5150 b = (Mat_SeqAIJ *)mpimat->B->data; 5151 ai = a->i; 5152 aj = a->j; 5153 bi = b->i; 5154 bj = b->j; 5155 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5156 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5157 aa = aav; 5158 ba = bav; 5159 if (scall == MAT_INITIAL_MATRIX) { 5160 PetscCall(PetscMalloc1(1 + am, &ci)); 5161 ci[0] = 0; 5162 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5163 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5164 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5165 k = 0; 5166 for (i = 0; i < am; i++) { 5167 ncols_o = bi[i + 1] - bi[i]; 5168 ncols_d = ai[i + 1] - ai[i]; 5169 /* off-diagonal portion of A */ 5170 for (jo = 0; jo < ncols_o; jo++) { 5171 col = cmap[*bj]; 5172 if (col >= cstart) break; 5173 cj[k] = col; 5174 bj++; 5175 ca[k++] = *ba++; 5176 } 5177 /* diagonal portion of A */ 5178 for (j = 0; j < ncols_d; j++) { 5179 cj[k] = cstart + *aj++; 5180 ca[k++] = *aa++; 5181 } 5182 /* off-diagonal portion of A */ 5183 for (j = jo; j < ncols_o; j++) { 5184 cj[k] = cmap[*bj++]; 5185 ca[k++] = *ba++; 5186 } 5187 } 5188 /* put together the new matrix */ 5189 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5190 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5191 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5192 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5193 mat->free_a = PETSC_TRUE; 5194 mat->free_ij = PETSC_TRUE; 5195 mat->nonew = 0; 5196 } else if (scall == MAT_REUSE_MATRIX) { 5197 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5198 ci = mat->i; 5199 cj = mat->j; 5200 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5201 for (i = 0; i < am; i++) { 5202 /* off-diagonal portion of A */ 5203 ncols_o = bi[i + 1] - bi[i]; 5204 for (jo = 0; jo < ncols_o; jo++) { 5205 col = cmap[*bj]; 5206 if (col >= cstart) break; 5207 *cam++ = *ba++; 5208 bj++; 5209 } 5210 /* diagonal portion of A */ 5211 ncols_d = ai[i + 1] - ai[i]; 5212 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5213 /* off-diagonal portion of A */ 5214 for (j = jo; j < ncols_o; j++) { 5215 *cam++ = *ba++; 5216 bj++; 5217 } 5218 } 5219 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5220 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5221 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5222 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5223 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5224 PetscFunctionReturn(PETSC_SUCCESS); 5225 } 5226 5227 /*@ 5228 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5229 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5230 5231 Not Collective 5232 5233 Input Parameters: 5234 + A - the matrix 5235 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5236 5237 Output Parameters: 5238 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5239 - A_loc - the local sequential matrix generated 5240 5241 Level: developer 5242 5243 Note: 5244 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5245 part, then those associated with the off-diagonal part (in its local ordering) 5246 5247 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5248 @*/ 5249 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5250 { 5251 Mat Ao, Ad; 5252 const PetscInt *cmap; 5253 PetscMPIInt size; 5254 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5255 5256 PetscFunctionBegin; 5257 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5258 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5259 if (size == 1) { 5260 if (scall == MAT_INITIAL_MATRIX) { 5261 PetscCall(PetscObjectReference((PetscObject)Ad)); 5262 *A_loc = Ad; 5263 } else if (scall == MAT_REUSE_MATRIX) { 5264 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5265 } 5266 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5267 PetscFunctionReturn(PETSC_SUCCESS); 5268 } 5269 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5270 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5271 if (f) { 5272 PetscCall((*f)(A, scall, glob, A_loc)); 5273 } else { 5274 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5275 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5276 Mat_SeqAIJ *c; 5277 PetscInt *ai = a->i, *aj = a->j; 5278 PetscInt *bi = b->i, *bj = b->j; 5279 PetscInt *ci, *cj; 5280 const PetscScalar *aa, *ba; 5281 PetscScalar *ca; 5282 PetscInt i, j, am, dn, on; 5283 5284 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5285 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5286 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5287 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5288 if (scall == MAT_INITIAL_MATRIX) { 5289 PetscInt k; 5290 PetscCall(PetscMalloc1(1 + am, &ci)); 5291 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5292 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5293 ci[0] = 0; 5294 for (i = 0, k = 0; i < am; i++) { 5295 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5296 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5297 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5298 /* diagonal portion of A */ 5299 for (j = 0; j < ncols_d; j++, k++) { 5300 cj[k] = *aj++; 5301 ca[k] = *aa++; 5302 } 5303 /* off-diagonal portion of A */ 5304 for (j = 0; j < ncols_o; j++, k++) { 5305 cj[k] = dn + *bj++; 5306 ca[k] = *ba++; 5307 } 5308 } 5309 /* put together the new matrix */ 5310 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5311 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5312 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5313 c = (Mat_SeqAIJ *)(*A_loc)->data; 5314 c->free_a = PETSC_TRUE; 5315 c->free_ij = PETSC_TRUE; 5316 c->nonew = 0; 5317 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5318 } else if (scall == MAT_REUSE_MATRIX) { 5319 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5320 for (i = 0; i < am; i++) { 5321 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5322 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5323 /* diagonal portion of A */ 5324 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5325 /* off-diagonal portion of A */ 5326 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5327 } 5328 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5329 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5330 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5331 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5332 if (glob) { 5333 PetscInt cst, *gidx; 5334 5335 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5336 PetscCall(PetscMalloc1(dn + on, &gidx)); 5337 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5338 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5339 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5340 } 5341 } 5342 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5343 PetscFunctionReturn(PETSC_SUCCESS); 5344 } 5345 5346 /*@C 5347 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5348 5349 Not Collective 5350 5351 Input Parameters: 5352 + A - the matrix 5353 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5354 . row - index set of rows to extract (or `NULL`) 5355 - col - index set of columns to extract (or `NULL`) 5356 5357 Output Parameter: 5358 . A_loc - the local sequential matrix generated 5359 5360 Level: developer 5361 5362 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5363 @*/ 5364 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5365 { 5366 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5367 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5368 IS isrowa, iscola; 5369 Mat *aloc; 5370 PetscBool match; 5371 5372 PetscFunctionBegin; 5373 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5374 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5375 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5376 if (!row) { 5377 start = A->rmap->rstart; 5378 end = A->rmap->rend; 5379 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5380 } else { 5381 isrowa = *row; 5382 } 5383 if (!col) { 5384 start = A->cmap->rstart; 5385 cmap = a->garray; 5386 nzA = a->A->cmap->n; 5387 nzB = a->B->cmap->n; 5388 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5389 ncols = 0; 5390 for (i = 0; i < nzB; i++) { 5391 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5392 else break; 5393 } 5394 imark = i; 5395 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5396 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5397 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5398 } else { 5399 iscola = *col; 5400 } 5401 if (scall != MAT_INITIAL_MATRIX) { 5402 PetscCall(PetscMalloc1(1, &aloc)); 5403 aloc[0] = *A_loc; 5404 } 5405 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5406 if (!col) { /* attach global id of condensed columns */ 5407 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5408 } 5409 *A_loc = aloc[0]; 5410 PetscCall(PetscFree(aloc)); 5411 if (!row) PetscCall(ISDestroy(&isrowa)); 5412 if (!col) PetscCall(ISDestroy(&iscola)); 5413 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5414 PetscFunctionReturn(PETSC_SUCCESS); 5415 } 5416 5417 /* 5418 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5419 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5420 * on a global size. 5421 * */ 5422 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5423 { 5424 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5425 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5426 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5427 PetscMPIInt owner; 5428 PetscSFNode *iremote, *oiremote; 5429 const PetscInt *lrowindices; 5430 PetscSF sf, osf; 5431 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5432 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5433 MPI_Comm comm; 5434 ISLocalToGlobalMapping mapping; 5435 const PetscScalar *pd_a, *po_a; 5436 5437 PetscFunctionBegin; 5438 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5439 /* plocalsize is the number of roots 5440 * nrows is the number of leaves 5441 * */ 5442 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5443 PetscCall(ISGetLocalSize(rows, &nrows)); 5444 PetscCall(PetscCalloc1(nrows, &iremote)); 5445 PetscCall(ISGetIndices(rows, &lrowindices)); 5446 for (i = 0; i < nrows; i++) { 5447 /* Find a remote index and an owner for a row 5448 * The row could be local or remote 5449 * */ 5450 owner = 0; 5451 lidx = 0; 5452 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5453 iremote[i].index = lidx; 5454 iremote[i].rank = owner; 5455 } 5456 /* Create SF to communicate how many nonzero columns for each row */ 5457 PetscCall(PetscSFCreate(comm, &sf)); 5458 /* SF will figure out the number of nonzero columns for each row, and their 5459 * offsets 5460 * */ 5461 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5462 PetscCall(PetscSFSetFromOptions(sf)); 5463 PetscCall(PetscSFSetUp(sf)); 5464 5465 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5466 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5467 PetscCall(PetscCalloc1(nrows, &pnnz)); 5468 roffsets[0] = 0; 5469 roffsets[1] = 0; 5470 for (i = 0; i < plocalsize; i++) { 5471 /* diagonal */ 5472 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5473 /* off-diagonal */ 5474 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5475 /* compute offsets so that we relative location for each row */ 5476 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5477 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5478 } 5479 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5480 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5481 /* 'r' means root, and 'l' means leaf */ 5482 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5483 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5484 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5485 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5486 PetscCall(PetscSFDestroy(&sf)); 5487 PetscCall(PetscFree(roffsets)); 5488 PetscCall(PetscFree(nrcols)); 5489 dntotalcols = 0; 5490 ontotalcols = 0; 5491 ncol = 0; 5492 for (i = 0; i < nrows; i++) { 5493 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5494 ncol = PetscMax(pnnz[i], ncol); 5495 /* diagonal */ 5496 dntotalcols += nlcols[i * 2 + 0]; 5497 /* off-diagonal */ 5498 ontotalcols += nlcols[i * 2 + 1]; 5499 } 5500 /* We do not need to figure the right number of columns 5501 * since all the calculations will be done by going through the raw data 5502 * */ 5503 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5504 PetscCall(MatSetUp(*P_oth)); 5505 PetscCall(PetscFree(pnnz)); 5506 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5507 /* diagonal */ 5508 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5509 /* off-diagonal */ 5510 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5511 /* diagonal */ 5512 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5513 /* off-diagonal */ 5514 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5515 dntotalcols = 0; 5516 ontotalcols = 0; 5517 ntotalcols = 0; 5518 for (i = 0; i < nrows; i++) { 5519 owner = 0; 5520 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5521 /* Set iremote for diag matrix */ 5522 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5523 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5524 iremote[dntotalcols].rank = owner; 5525 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5526 ilocal[dntotalcols++] = ntotalcols++; 5527 } 5528 /* off-diagonal */ 5529 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5530 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5531 oiremote[ontotalcols].rank = owner; 5532 oilocal[ontotalcols++] = ntotalcols++; 5533 } 5534 } 5535 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5536 PetscCall(PetscFree(loffsets)); 5537 PetscCall(PetscFree(nlcols)); 5538 PetscCall(PetscSFCreate(comm, &sf)); 5539 /* P serves as roots and P_oth is leaves 5540 * Diag matrix 5541 * */ 5542 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5543 PetscCall(PetscSFSetFromOptions(sf)); 5544 PetscCall(PetscSFSetUp(sf)); 5545 5546 PetscCall(PetscSFCreate(comm, &osf)); 5547 /* off-diagonal */ 5548 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5549 PetscCall(PetscSFSetFromOptions(osf)); 5550 PetscCall(PetscSFSetUp(osf)); 5551 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5552 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5553 /* operate on the matrix internal data to save memory */ 5554 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5555 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5556 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5557 /* Convert to global indices for diag matrix */ 5558 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5559 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5560 /* We want P_oth store global indices */ 5561 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5562 /* Use memory scalable approach */ 5563 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5564 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5565 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5566 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5567 /* Convert back to local indices */ 5568 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5569 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5570 nout = 0; 5571 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5572 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5573 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5574 /* Exchange values */ 5575 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5576 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5577 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5578 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5579 /* Stop PETSc from shrinking memory */ 5580 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5581 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5582 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5583 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5584 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5585 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5586 PetscCall(PetscSFDestroy(&sf)); 5587 PetscCall(PetscSFDestroy(&osf)); 5588 PetscFunctionReturn(PETSC_SUCCESS); 5589 } 5590 5591 /* 5592 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5593 * This supports MPIAIJ and MAIJ 5594 * */ 5595 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5596 { 5597 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5598 Mat_SeqAIJ *p_oth; 5599 IS rows, map; 5600 PetscHMapI hamp; 5601 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5602 MPI_Comm comm; 5603 PetscSF sf, osf; 5604 PetscBool has; 5605 5606 PetscFunctionBegin; 5607 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5608 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5609 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5610 * and then create a submatrix (that often is an overlapping matrix) 5611 * */ 5612 if (reuse == MAT_INITIAL_MATRIX) { 5613 /* Use a hash table to figure out unique keys */ 5614 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5615 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5616 count = 0; 5617 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5618 for (i = 0; i < a->B->cmap->n; i++) { 5619 key = a->garray[i] / dof; 5620 PetscCall(PetscHMapIHas(hamp, key, &has)); 5621 if (!has) { 5622 mapping[i] = count; 5623 PetscCall(PetscHMapISet(hamp, key, count++)); 5624 } else { 5625 /* Current 'i' has the same value the previous step */ 5626 mapping[i] = count - 1; 5627 } 5628 } 5629 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5630 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5631 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5632 PetscCall(PetscCalloc1(htsize, &rowindices)); 5633 off = 0; 5634 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5635 PetscCall(PetscHMapIDestroy(&hamp)); 5636 PetscCall(PetscSortInt(htsize, rowindices)); 5637 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5638 /* In case, the matrix was already created but users want to recreate the matrix */ 5639 PetscCall(MatDestroy(P_oth)); 5640 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5641 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5642 PetscCall(ISDestroy(&map)); 5643 PetscCall(ISDestroy(&rows)); 5644 } else if (reuse == MAT_REUSE_MATRIX) { 5645 /* If matrix was already created, we simply update values using SF objects 5646 * that as attached to the matrix earlier. 5647 */ 5648 const PetscScalar *pd_a, *po_a; 5649 5650 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5651 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5652 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5653 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5654 /* Update values in place */ 5655 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5656 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5657 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5658 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5659 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5660 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5661 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5662 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5663 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5664 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5665 PetscFunctionReturn(PETSC_SUCCESS); 5666 } 5667 5668 /*@C 5669 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5670 5671 Collective 5672 5673 Input Parameters: 5674 + A - the first matrix in `MATMPIAIJ` format 5675 . B - the second matrix in `MATMPIAIJ` format 5676 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5677 5678 Output Parameters: 5679 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5680 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5681 - B_seq - the sequential matrix generated 5682 5683 Level: developer 5684 5685 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5686 @*/ 5687 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5688 { 5689 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5690 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5691 IS isrowb, iscolb; 5692 Mat *bseq = NULL; 5693 5694 PetscFunctionBegin; 5695 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5696 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5697 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5698 5699 if (scall == MAT_INITIAL_MATRIX) { 5700 start = A->cmap->rstart; 5701 cmap = a->garray; 5702 nzA = a->A->cmap->n; 5703 nzB = a->B->cmap->n; 5704 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5705 ncols = 0; 5706 for (i = 0; i < nzB; i++) { /* row < local row index */ 5707 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5708 else break; 5709 } 5710 imark = i; 5711 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5712 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5713 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5714 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5715 } else { 5716 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5717 isrowb = *rowb; 5718 iscolb = *colb; 5719 PetscCall(PetscMalloc1(1, &bseq)); 5720 bseq[0] = *B_seq; 5721 } 5722 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5723 *B_seq = bseq[0]; 5724 PetscCall(PetscFree(bseq)); 5725 if (!rowb) { 5726 PetscCall(ISDestroy(&isrowb)); 5727 } else { 5728 *rowb = isrowb; 5729 } 5730 if (!colb) { 5731 PetscCall(ISDestroy(&iscolb)); 5732 } else { 5733 *colb = iscolb; 5734 } 5735 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5736 PetscFunctionReturn(PETSC_SUCCESS); 5737 } 5738 5739 /* 5740 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5741 of the OFF-DIAGONAL portion of local A 5742 5743 Collective 5744 5745 Input Parameters: 5746 + A,B - the matrices in `MATMPIAIJ` format 5747 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5748 5749 Output Parameter: 5750 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5751 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5752 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5753 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5754 5755 Developer Note: 5756 This directly accesses information inside the VecScatter associated with the matrix-vector product 5757 for this matrix. This is not desirable.. 5758 5759 Level: developer 5760 5761 */ 5762 5763 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5764 { 5765 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5766 VecScatter ctx; 5767 MPI_Comm comm; 5768 const PetscMPIInt *rprocs, *sprocs; 5769 PetscMPIInt nrecvs, nsends; 5770 const PetscInt *srow, *rstarts, *sstarts; 5771 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5772 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5773 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5774 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5775 PetscMPIInt size, tag, rank, nreqs; 5776 5777 PetscFunctionBegin; 5778 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5779 PetscCallMPI(MPI_Comm_size(comm, &size)); 5780 5781 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5782 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5783 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5784 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5785 5786 if (size == 1) { 5787 startsj_s = NULL; 5788 bufa_ptr = NULL; 5789 *B_oth = NULL; 5790 PetscFunctionReturn(PETSC_SUCCESS); 5791 } 5792 5793 ctx = a->Mvctx; 5794 tag = ((PetscObject)ctx)->tag; 5795 5796 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5797 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5798 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5799 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5800 PetscCall(PetscMalloc1(nreqs, &reqs)); 5801 rwaits = reqs; 5802 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5803 5804 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5805 if (scall == MAT_INITIAL_MATRIX) { 5806 /* i-array */ 5807 /* post receives */ 5808 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5809 for (i = 0; i < nrecvs; i++) { 5810 rowlen = rvalues + rstarts[i] * rbs; 5811 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5812 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5813 } 5814 5815 /* pack the outgoing message */ 5816 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5817 5818 sstartsj[0] = 0; 5819 rstartsj[0] = 0; 5820 len = 0; /* total length of j or a array to be sent */ 5821 if (nsends) { 5822 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5823 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5824 } 5825 for (i = 0; i < nsends; i++) { 5826 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5827 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5828 for (j = 0; j < nrows; j++) { 5829 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5830 for (l = 0; l < sbs; l++) { 5831 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5832 5833 rowlen[j * sbs + l] = ncols; 5834 5835 len += ncols; 5836 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5837 } 5838 k++; 5839 } 5840 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5841 5842 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5843 } 5844 /* recvs and sends of i-array are completed */ 5845 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5846 PetscCall(PetscFree(svalues)); 5847 5848 /* allocate buffers for sending j and a arrays */ 5849 PetscCall(PetscMalloc1(len, &bufj)); 5850 PetscCall(PetscMalloc1(len, &bufa)); 5851 5852 /* create i-array of B_oth */ 5853 PetscCall(PetscMalloc1(aBn + 1, &b_othi)); 5854 5855 b_othi[0] = 0; 5856 len = 0; /* total length of j or a array to be received */ 5857 k = 0; 5858 for (i = 0; i < nrecvs; i++) { 5859 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5860 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5861 for (j = 0; j < nrows; j++) { 5862 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5863 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5864 k++; 5865 } 5866 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5867 } 5868 PetscCall(PetscFree(rvalues)); 5869 5870 /* allocate space for j and a arrays of B_oth */ 5871 PetscCall(PetscMalloc1(b_othi[aBn], &b_othj)); 5872 PetscCall(PetscMalloc1(b_othi[aBn], &b_otha)); 5873 5874 /* j-array */ 5875 /* post receives of j-array */ 5876 for (i = 0; i < nrecvs; i++) { 5877 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5878 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_othj, rstartsj[i]), nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5879 } 5880 5881 /* pack the outgoing message j-array */ 5882 if (nsends) k = sstarts[0]; 5883 for (i = 0; i < nsends; i++) { 5884 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5885 bufJ = PetscSafePointerPlusOffset(bufj, sstartsj[i]); 5886 for (j = 0; j < nrows; j++) { 5887 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5888 for (ll = 0; ll < sbs; ll++) { 5889 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5890 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5891 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5892 } 5893 } 5894 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufj, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5895 } 5896 5897 /* recvs and sends of j-array are completed */ 5898 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5899 } else if (scall == MAT_REUSE_MATRIX) { 5900 sstartsj = *startsj_s; 5901 rstartsj = *startsj_r; 5902 bufa = *bufa_ptr; 5903 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5904 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5905 5906 /* a-array */ 5907 /* post receives of a-array */ 5908 for (i = 0; i < nrecvs; i++) { 5909 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5910 PetscCallMPI(MPIU_Irecv(PetscSafePointerPlusOffset(b_otha, rstartsj[i]), nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5911 } 5912 5913 /* pack the outgoing message a-array */ 5914 if (nsends) k = sstarts[0]; 5915 for (i = 0; i < nsends; i++) { 5916 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5917 bufA = PetscSafePointerPlusOffset(bufa, sstartsj[i]); 5918 for (j = 0; j < nrows; j++) { 5919 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5920 for (ll = 0; ll < sbs; ll++) { 5921 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5922 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5923 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5924 } 5925 } 5926 PetscCallMPI(MPIU_Isend(PetscSafePointerPlusOffset(bufa, sstartsj[i]), sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5927 } 5928 /* recvs and sends of a-array are completed */ 5929 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5930 PetscCall(PetscFree(reqs)); 5931 5932 if (scall == MAT_INITIAL_MATRIX) { 5933 Mat_SeqAIJ *b_oth; 5934 5935 /* put together the new matrix */ 5936 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5937 5938 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5939 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5940 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5941 b_oth->free_a = PETSC_TRUE; 5942 b_oth->free_ij = PETSC_TRUE; 5943 b_oth->nonew = 0; 5944 5945 PetscCall(PetscFree(bufj)); 5946 if (!startsj_s || !bufa_ptr) { 5947 PetscCall(PetscFree2(sstartsj, rstartsj)); 5948 PetscCall(PetscFree(bufa_ptr)); 5949 } else { 5950 *startsj_s = sstartsj; 5951 *startsj_r = rstartsj; 5952 *bufa_ptr = bufa; 5953 } 5954 } else if (scall == MAT_REUSE_MATRIX) { 5955 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5956 } 5957 5958 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5959 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5960 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5961 PetscFunctionReturn(PETSC_SUCCESS); 5962 } 5963 5964 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5965 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5966 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5967 #if defined(PETSC_HAVE_MKL_SPARSE) 5968 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5969 #endif 5970 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5971 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5972 #if defined(PETSC_HAVE_ELEMENTAL) 5973 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5974 #endif 5975 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 5976 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5977 #endif 5978 #if defined(PETSC_HAVE_HYPRE) 5979 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5980 #endif 5981 #if defined(PETSC_HAVE_CUDA) 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5983 #endif 5984 #if defined(PETSC_HAVE_HIP) 5985 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 5986 #endif 5987 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5988 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5989 #endif 5990 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5991 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5992 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5993 5994 /* 5995 Computes (B'*A')' since computing B*A directly is untenable 5996 5997 n p p 5998 [ ] [ ] [ ] 5999 m [ A ] * n [ B ] = m [ C ] 6000 [ ] [ ] [ ] 6001 6002 */ 6003 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6004 { 6005 Mat At, Bt, Ct; 6006 6007 PetscFunctionBegin; 6008 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6009 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6010 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6011 PetscCall(MatDestroy(&At)); 6012 PetscCall(MatDestroy(&Bt)); 6013 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6014 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6015 PetscCall(MatDestroy(&Ct)); 6016 PetscFunctionReturn(PETSC_SUCCESS); 6017 } 6018 6019 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6020 { 6021 PetscBool cisdense; 6022 6023 PetscFunctionBegin; 6024 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6025 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6026 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6027 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6028 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6029 PetscCall(MatSetUp(C)); 6030 6031 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6032 PetscFunctionReturn(PETSC_SUCCESS); 6033 } 6034 6035 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6036 { 6037 Mat_Product *product = C->product; 6038 Mat A = product->A, B = product->B; 6039 6040 PetscFunctionBegin; 6041 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6042 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6043 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6044 C->ops->productsymbolic = MatProductSymbolic_AB; 6045 PetscFunctionReturn(PETSC_SUCCESS); 6046 } 6047 6048 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6049 { 6050 Mat_Product *product = C->product; 6051 6052 PetscFunctionBegin; 6053 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6054 PetscFunctionReturn(PETSC_SUCCESS); 6055 } 6056 6057 /* 6058 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6059 6060 Input Parameters: 6061 6062 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6063 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6064 6065 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6066 6067 For Set1, j1[] contains column indices of the nonzeros. 6068 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6069 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6070 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6071 6072 Similar for Set2. 6073 6074 This routine merges the two sets of nonzeros row by row and removes repeats. 6075 6076 Output Parameters: (memory is allocated by the caller) 6077 6078 i[],j[]: the CSR of the merged matrix, which has m rows. 6079 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6080 imap2[]: similar to imap1[], but for Set2. 6081 Note we order nonzeros row-by-row and from left to right. 6082 */ 6083 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6084 { 6085 PetscInt r, m; /* Row index of mat */ 6086 PetscCount t, t1, t2, b1, e1, b2, e2; 6087 6088 PetscFunctionBegin; 6089 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6090 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6091 i[0] = 0; 6092 for (r = 0; r < m; r++) { /* Do row by row merging */ 6093 b1 = rowBegin1[r]; 6094 e1 = rowEnd1[r]; 6095 b2 = rowBegin2[r]; 6096 e2 = rowEnd2[r]; 6097 while (b1 < e1 && b2 < e2) { 6098 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6099 j[t] = j1[b1]; 6100 imap1[t1] = t; 6101 imap2[t2] = t; 6102 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6103 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6104 t1++; 6105 t2++; 6106 t++; 6107 } else if (j1[b1] < j2[b2]) { 6108 j[t] = j1[b1]; 6109 imap1[t1] = t; 6110 b1 += jmap1[t1 + 1] - jmap1[t1]; 6111 t1++; 6112 t++; 6113 } else { 6114 j[t] = j2[b2]; 6115 imap2[t2] = t; 6116 b2 += jmap2[t2 + 1] - jmap2[t2]; 6117 t2++; 6118 t++; 6119 } 6120 } 6121 /* Merge the remaining in either j1[] or j2[] */ 6122 while (b1 < e1) { 6123 j[t] = j1[b1]; 6124 imap1[t1] = t; 6125 b1 += jmap1[t1 + 1] - jmap1[t1]; 6126 t1++; 6127 t++; 6128 } 6129 while (b2 < e2) { 6130 j[t] = j2[b2]; 6131 imap2[t2] = t; 6132 b2 += jmap2[t2 + 1] - jmap2[t2]; 6133 t2++; 6134 t++; 6135 } 6136 PetscCall(PetscIntCast(t, i + r + 1)); 6137 } 6138 PetscFunctionReturn(PETSC_SUCCESS); 6139 } 6140 6141 /* 6142 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6143 6144 Input Parameters: 6145 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6146 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6147 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6148 6149 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6150 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6151 6152 Output Parameters: 6153 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6154 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6155 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6156 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6157 6158 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6159 Atot: number of entries belonging to the diagonal block. 6160 Annz: number of unique nonzeros belonging to the diagonal block. 6161 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6162 repeats (i.e., same 'i,j' pair). 6163 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6164 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6165 6166 Atot: number of entries belonging to the diagonal block 6167 Annz: number of unique nonzeros belonging to the diagonal block. 6168 6169 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6170 6171 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6172 */ 6173 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6174 { 6175 PetscInt cstart, cend, rstart, rend, row, col; 6176 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6177 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6178 PetscCount k, m, p, q, r, s, mid; 6179 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6180 6181 PetscFunctionBegin; 6182 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6183 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6184 m = rend - rstart; 6185 6186 /* Skip negative rows */ 6187 for (k = 0; k < n; k++) 6188 if (i[k] >= 0) break; 6189 6190 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6191 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6192 */ 6193 while (k < n) { 6194 row = i[k]; 6195 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6196 for (s = k; s < n; s++) 6197 if (i[s] != row) break; 6198 6199 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6200 for (p = k; p < s; p++) { 6201 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6202 } 6203 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6204 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6205 rowBegin[row - rstart] = k; 6206 rowMid[row - rstart] = mid; 6207 rowEnd[row - rstart] = s; 6208 PetscCheck(k == s || j[s - 1] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is >= matrix column size %" PetscInt_FMT, j[s - 1], mat->cmap->N); 6209 6210 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6211 Atot += mid - k; 6212 Btot += s - mid; 6213 6214 /* Count unique nonzeros of this diag row */ 6215 for (p = k; p < mid;) { 6216 col = j[p]; 6217 do { 6218 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6219 p++; 6220 } while (p < mid && j[p] == col); 6221 Annz++; 6222 } 6223 6224 /* Count unique nonzeros of this offdiag row */ 6225 for (p = mid; p < s;) { 6226 col = j[p]; 6227 do { 6228 p++; 6229 } while (p < s && j[p] == col); 6230 Bnnz++; 6231 } 6232 k = s; 6233 } 6234 6235 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6236 PetscCall(PetscMalloc1(Atot, &Aperm)); 6237 PetscCall(PetscMalloc1(Btot, &Bperm)); 6238 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6239 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6240 6241 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6242 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6243 for (r = 0; r < m; r++) { 6244 k = rowBegin[r]; 6245 mid = rowMid[r]; 6246 s = rowEnd[r]; 6247 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6248 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6249 Atot += mid - k; 6250 Btot += s - mid; 6251 6252 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6253 for (p = k; p < mid;) { 6254 col = j[p]; 6255 q = p; 6256 do { 6257 p++; 6258 } while (p < mid && j[p] == col); 6259 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6260 Annz++; 6261 } 6262 6263 for (p = mid; p < s;) { 6264 col = j[p]; 6265 q = p; 6266 do { 6267 p++; 6268 } while (p < s && j[p] == col); 6269 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6270 Bnnz++; 6271 } 6272 } 6273 /* Output */ 6274 *Aperm_ = Aperm; 6275 *Annz_ = Annz; 6276 *Atot_ = Atot; 6277 *Ajmap_ = Ajmap; 6278 *Bperm_ = Bperm; 6279 *Bnnz_ = Bnnz; 6280 *Btot_ = Btot; 6281 *Bjmap_ = Bjmap; 6282 PetscFunctionReturn(PETSC_SUCCESS); 6283 } 6284 6285 /* 6286 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6287 6288 Input Parameters: 6289 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6290 nnz: number of unique nonzeros in the merged matrix 6291 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6292 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6293 6294 Output Parameter: (memory is allocated by the caller) 6295 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6296 6297 Example: 6298 nnz1 = 4 6299 nnz = 6 6300 imap = [1,3,4,5] 6301 jmap = [0,3,5,6,7] 6302 then, 6303 jmap_new = [0,0,3,3,5,6,7] 6304 */ 6305 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6306 { 6307 PetscCount k, p; 6308 6309 PetscFunctionBegin; 6310 jmap_new[0] = 0; 6311 p = nnz; /* p loops over jmap_new[] backwards */ 6312 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6313 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6314 } 6315 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6316 PetscFunctionReturn(PETSC_SUCCESS); 6317 } 6318 6319 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6320 { 6321 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6322 6323 PetscFunctionBegin; 6324 PetscCall(PetscSFDestroy(&coo->sf)); 6325 PetscCall(PetscFree(coo->Aperm1)); 6326 PetscCall(PetscFree(coo->Bperm1)); 6327 PetscCall(PetscFree(coo->Ajmap1)); 6328 PetscCall(PetscFree(coo->Bjmap1)); 6329 PetscCall(PetscFree(coo->Aimap2)); 6330 PetscCall(PetscFree(coo->Bimap2)); 6331 PetscCall(PetscFree(coo->Aperm2)); 6332 PetscCall(PetscFree(coo->Bperm2)); 6333 PetscCall(PetscFree(coo->Ajmap2)); 6334 PetscCall(PetscFree(coo->Bjmap2)); 6335 PetscCall(PetscFree(coo->Cperm1)); 6336 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6337 PetscCall(PetscFree(coo)); 6338 PetscFunctionReturn(PETSC_SUCCESS); 6339 } 6340 6341 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6342 { 6343 MPI_Comm comm; 6344 PetscMPIInt rank, size; 6345 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6346 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6347 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6348 PetscContainer container; 6349 MatCOOStruct_MPIAIJ *coo; 6350 6351 PetscFunctionBegin; 6352 PetscCall(PetscFree(mpiaij->garray)); 6353 PetscCall(VecDestroy(&mpiaij->lvec)); 6354 #if defined(PETSC_USE_CTABLE) 6355 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6356 #else 6357 PetscCall(PetscFree(mpiaij->colmap)); 6358 #endif 6359 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6360 mat->assembled = PETSC_FALSE; 6361 mat->was_assembled = PETSC_FALSE; 6362 6363 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6364 PetscCallMPI(MPI_Comm_size(comm, &size)); 6365 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6366 PetscCall(PetscLayoutSetUp(mat->rmap)); 6367 PetscCall(PetscLayoutSetUp(mat->cmap)); 6368 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6369 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6370 PetscCall(MatGetLocalSize(mat, &m, &n)); 6371 PetscCall(MatGetSize(mat, &M, &N)); 6372 6373 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6374 /* entries come first, then local rows, then remote rows. */ 6375 PetscCount n1 = coo_n, *perm1; 6376 PetscInt *i1 = coo_i, *j1 = coo_j; 6377 6378 PetscCall(PetscMalloc1(n1, &perm1)); 6379 for (k = 0; k < n1; k++) perm1[k] = k; 6380 6381 /* Manipulate indices so that entries with negative row or col indices will have smallest 6382 row indices, local entries will have greater but negative row indices, and remote entries 6383 will have positive row indices. 6384 */ 6385 for (k = 0; k < n1; k++) { 6386 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6387 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6388 else { 6389 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6390 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6391 } 6392 } 6393 6394 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6395 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6396 6397 /* Advance k to the first entry we need to take care of */ 6398 for (k = 0; k < n1; k++) 6399 if (i1[k] > PETSC_INT_MIN) break; 6400 PetscCount i1start = k; 6401 6402 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6403 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6404 6405 PetscCheck(n1 == 0 || i1[n1 - 1] < M, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "COO row index %" PetscInt_FMT " is >= the matrix row size %" PetscInt_FMT, i1[n1 - 1], M); 6406 6407 /* Send remote rows to their owner */ 6408 /* Find which rows should be sent to which remote ranks*/ 6409 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6410 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6411 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6412 const PetscInt *ranges; 6413 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6414 6415 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6416 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6417 for (k = rem; k < n1;) { 6418 PetscMPIInt owner; 6419 PetscInt firstRow, lastRow; 6420 6421 /* Locate a row range */ 6422 firstRow = i1[k]; /* first row of this owner */ 6423 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6424 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6425 6426 /* Find the first index 'p' in [k,n) with i1[p] belonging to next owner */ 6427 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6428 6429 /* All entries in [k,p) belong to this remote owner */ 6430 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6431 PetscMPIInt *sendto2; 6432 PetscInt *nentries2; 6433 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6434 6435 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6436 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6437 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6438 PetscCall(PetscFree2(sendto, nentries2)); 6439 sendto = sendto2; 6440 nentries = nentries2; 6441 maxNsend = maxNsend2; 6442 } 6443 sendto[nsend] = owner; 6444 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6445 nsend++; 6446 k = p; 6447 } 6448 6449 /* Build 1st SF to know offsets on remote to send data */ 6450 PetscSF sf1; 6451 PetscInt nroots = 1, nroots2 = 0; 6452 PetscInt nleaves = nsend, nleaves2 = 0; 6453 PetscInt *offsets; 6454 PetscSFNode *iremote; 6455 6456 PetscCall(PetscSFCreate(comm, &sf1)); 6457 PetscCall(PetscMalloc1(nsend, &iremote)); 6458 PetscCall(PetscMalloc1(nsend, &offsets)); 6459 for (k = 0; k < nsend; k++) { 6460 iremote[k].rank = sendto[k]; 6461 iremote[k].index = 0; 6462 nleaves2 += nentries[k]; 6463 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6464 } 6465 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6466 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6467 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6468 PetscCall(PetscSFDestroy(&sf1)); 6469 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6470 6471 /* Build 2nd SF to send remote COOs to their owner */ 6472 PetscSF sf2; 6473 nroots = nroots2; 6474 nleaves = nleaves2; 6475 PetscCall(PetscSFCreate(comm, &sf2)); 6476 PetscCall(PetscSFSetFromOptions(sf2)); 6477 PetscCall(PetscMalloc1(nleaves, &iremote)); 6478 p = 0; 6479 for (k = 0; k < nsend; k++) { 6480 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6481 for (q = 0; q < nentries[k]; q++, p++) { 6482 iremote[p].rank = sendto[k]; 6483 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6484 } 6485 } 6486 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6487 6488 /* Send the remote COOs to their owner */ 6489 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6490 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6491 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6492 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6493 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6494 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6495 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6496 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6497 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6498 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6499 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6500 6501 PetscCall(PetscFree(offsets)); 6502 PetscCall(PetscFree2(sendto, nentries)); 6503 6504 /* Sort received COOs by row along with the permutation array */ 6505 for (k = 0; k < n2; k++) perm2[k] = k; 6506 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6507 6508 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6509 PetscCount *Cperm1; 6510 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6511 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6512 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6513 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6514 6515 /* Support for HYPRE matrices, kind of a hack. 6516 Swap min column with diagonal so that diagonal values will go first */ 6517 PetscBool hypre; 6518 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6519 if (hypre) { 6520 PetscInt *minj; 6521 PetscBT hasdiag; 6522 6523 PetscCall(PetscBTCreate(m, &hasdiag)); 6524 PetscCall(PetscMalloc1(m, &minj)); 6525 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6526 for (k = i1start; k < rem; k++) { 6527 if (j1[k] < cstart || j1[k] >= cend) continue; 6528 const PetscInt rindex = i1[k] - rstart; 6529 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6530 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6531 } 6532 for (k = 0; k < n2; k++) { 6533 if (j2[k] < cstart || j2[k] >= cend) continue; 6534 const PetscInt rindex = i2[k] - rstart; 6535 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6536 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6537 } 6538 for (k = i1start; k < rem; k++) { 6539 const PetscInt rindex = i1[k] - rstart; 6540 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6541 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6542 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6543 } 6544 for (k = 0; k < n2; k++) { 6545 const PetscInt rindex = i2[k] - rstart; 6546 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6547 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6548 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6549 } 6550 PetscCall(PetscBTDestroy(&hasdiag)); 6551 PetscCall(PetscFree(minj)); 6552 } 6553 6554 /* Split local COOs and received COOs into diag/offdiag portions */ 6555 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6556 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6557 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6558 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6559 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6560 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6561 6562 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6563 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6564 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6565 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6566 6567 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6568 PetscInt *Ai, *Bi; 6569 PetscInt *Aj, *Bj; 6570 6571 PetscCall(PetscMalloc1(m + 1, &Ai)); 6572 PetscCall(PetscMalloc1(m + 1, &Bi)); 6573 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6574 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6575 6576 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6577 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6578 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6579 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6580 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6581 6582 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6583 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6584 6585 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6586 /* expect nonzeros in A/B most likely have local contributing entries */ 6587 PetscInt Annz = Ai[m]; 6588 PetscInt Bnnz = Bi[m]; 6589 PetscCount *Ajmap1_new, *Bjmap1_new; 6590 6591 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6592 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6593 6594 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6595 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6596 6597 PetscCall(PetscFree(Aimap1)); 6598 PetscCall(PetscFree(Ajmap1)); 6599 PetscCall(PetscFree(Bimap1)); 6600 PetscCall(PetscFree(Bjmap1)); 6601 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6602 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6603 PetscCall(PetscFree(perm1)); 6604 PetscCall(PetscFree3(i2, j2, perm2)); 6605 6606 Ajmap1 = Ajmap1_new; 6607 Bjmap1 = Bjmap1_new; 6608 6609 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6610 if (Annz < Annz1 + Annz2) { 6611 PetscInt *Aj_new; 6612 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6613 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6614 PetscCall(PetscFree(Aj)); 6615 Aj = Aj_new; 6616 } 6617 6618 if (Bnnz < Bnnz1 + Bnnz2) { 6619 PetscInt *Bj_new; 6620 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6621 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6622 PetscCall(PetscFree(Bj)); 6623 Bj = Bj_new; 6624 } 6625 6626 /* Create new submatrices for on-process and off-process coupling */ 6627 PetscScalar *Aa, *Ba; 6628 MatType rtype; 6629 Mat_SeqAIJ *a, *b; 6630 PetscObjectState state; 6631 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6632 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6633 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6634 if (cstart) { 6635 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6636 } 6637 6638 PetscCall(MatGetRootType_Private(mat, &rtype)); 6639 6640 MatSeqXAIJGetOptions_Private(mpiaij->A); 6641 PetscCall(MatDestroy(&mpiaij->A)); 6642 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6643 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6644 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6645 6646 MatSeqXAIJGetOptions_Private(mpiaij->B); 6647 PetscCall(MatDestroy(&mpiaij->B)); 6648 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6649 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6650 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6651 6652 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6653 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6654 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6655 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6656 6657 a = (Mat_SeqAIJ *)mpiaij->A->data; 6658 b = (Mat_SeqAIJ *)mpiaij->B->data; 6659 a->free_a = PETSC_TRUE; 6660 a->free_ij = PETSC_TRUE; 6661 b->free_a = PETSC_TRUE; 6662 b->free_ij = PETSC_TRUE; 6663 a->maxnz = a->nz; 6664 b->maxnz = b->nz; 6665 6666 /* conversion must happen AFTER multiply setup */ 6667 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6668 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6669 PetscCall(VecDestroy(&mpiaij->lvec)); 6670 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6671 6672 // Put the COO struct in a container and then attach that to the matrix 6673 PetscCall(PetscMalloc1(1, &coo)); 6674 coo->n = coo_n; 6675 coo->sf = sf2; 6676 coo->sendlen = nleaves; 6677 coo->recvlen = nroots; 6678 coo->Annz = Annz; 6679 coo->Bnnz = Bnnz; 6680 coo->Annz2 = Annz2; 6681 coo->Bnnz2 = Bnnz2; 6682 coo->Atot1 = Atot1; 6683 coo->Atot2 = Atot2; 6684 coo->Btot1 = Btot1; 6685 coo->Btot2 = Btot2; 6686 coo->Ajmap1 = Ajmap1; 6687 coo->Aperm1 = Aperm1; 6688 coo->Bjmap1 = Bjmap1; 6689 coo->Bperm1 = Bperm1; 6690 coo->Aimap2 = Aimap2; 6691 coo->Ajmap2 = Ajmap2; 6692 coo->Aperm2 = Aperm2; 6693 coo->Bimap2 = Bimap2; 6694 coo->Bjmap2 = Bjmap2; 6695 coo->Bperm2 = Bperm2; 6696 coo->Cperm1 = Cperm1; 6697 // Allocate in preallocation. If not used, it has zero cost on host 6698 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6699 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6700 PetscCall(PetscContainerSetPointer(container, coo)); 6701 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6702 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6703 PetscCall(PetscContainerDestroy(&container)); 6704 PetscFunctionReturn(PETSC_SUCCESS); 6705 } 6706 6707 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6708 { 6709 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6710 Mat A = mpiaij->A, B = mpiaij->B; 6711 PetscScalar *Aa, *Ba; 6712 PetscScalar *sendbuf, *recvbuf; 6713 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6714 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6715 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6716 const PetscCount *Cperm1; 6717 PetscContainer container; 6718 MatCOOStruct_MPIAIJ *coo; 6719 6720 PetscFunctionBegin; 6721 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6722 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6723 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6724 sendbuf = coo->sendbuf; 6725 recvbuf = coo->recvbuf; 6726 Ajmap1 = coo->Ajmap1; 6727 Ajmap2 = coo->Ajmap2; 6728 Aimap2 = coo->Aimap2; 6729 Bjmap1 = coo->Bjmap1; 6730 Bjmap2 = coo->Bjmap2; 6731 Bimap2 = coo->Bimap2; 6732 Aperm1 = coo->Aperm1; 6733 Aperm2 = coo->Aperm2; 6734 Bperm1 = coo->Bperm1; 6735 Bperm2 = coo->Bperm2; 6736 Cperm1 = coo->Cperm1; 6737 6738 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6739 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6740 6741 /* Pack entries to be sent to remote */ 6742 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6743 6744 /* Send remote entries to their owner and overlap the communication with local computation */ 6745 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6746 /* Add local entries to A and B */ 6747 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6748 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6749 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6750 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6751 } 6752 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6753 PetscScalar sum = 0.0; 6754 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6755 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6756 } 6757 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6758 6759 /* Add received remote entries to A and B */ 6760 for (PetscCount i = 0; i < coo->Annz2; i++) { 6761 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6762 } 6763 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6764 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6765 } 6766 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6767 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6768 PetscFunctionReturn(PETSC_SUCCESS); 6769 } 6770 6771 /*MC 6772 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6773 6774 Options Database Keys: 6775 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6776 6777 Level: beginner 6778 6779 Notes: 6780 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6781 in this case the values associated with the rows and columns one passes in are set to zero 6782 in the matrix 6783 6784 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6785 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6786 6787 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6788 M*/ 6789 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6790 { 6791 Mat_MPIAIJ *b; 6792 PetscMPIInt size; 6793 6794 PetscFunctionBegin; 6795 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6796 6797 PetscCall(PetscNew(&b)); 6798 B->data = (void *)b; 6799 B->ops[0] = MatOps_Values; 6800 B->assembled = PETSC_FALSE; 6801 B->insertmode = NOT_SET_VALUES; 6802 b->size = size; 6803 6804 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6805 6806 /* build cache for off array entries formed */ 6807 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6808 6809 b->donotstash = PETSC_FALSE; 6810 b->colmap = NULL; 6811 b->garray = NULL; 6812 b->roworiented = PETSC_TRUE; 6813 6814 /* stuff used for matrix vector multiply */ 6815 b->lvec = NULL; 6816 b->Mvctx = NULL; 6817 6818 /* stuff for MatGetRow() */ 6819 b->rowindices = NULL; 6820 b->rowvalues = NULL; 6821 b->getrowactive = PETSC_FALSE; 6822 6823 /* flexible pointer used in CUSPARSE classes */ 6824 b->spptr = NULL; 6825 6826 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6827 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6828 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6829 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6830 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6831 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6832 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6833 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6834 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6835 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6836 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6837 #if defined(PETSC_HAVE_CUDA) 6838 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6839 #endif 6840 #if defined(PETSC_HAVE_HIP) 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6842 #endif 6843 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6845 #endif 6846 #if defined(PETSC_HAVE_MKL_SPARSE) 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6848 #endif 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6853 #if defined(PETSC_HAVE_ELEMENTAL) 6854 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6855 #endif 6856 #if defined(PETSC_HAVE_SCALAPACK) && (defined(PETSC_USE_REAL_SINGLE) || defined(PETSC_USE_REAL_DOUBLE)) 6857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6858 #endif 6859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6861 #if defined(PETSC_HAVE_HYPRE) 6862 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6864 #endif 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6867 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6869 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6870 PetscFunctionReturn(PETSC_SUCCESS); 6871 } 6872 6873 /*@ 6874 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6875 and "off-diagonal" part of the matrix in CSR format. 6876 6877 Collective 6878 6879 Input Parameters: 6880 + comm - MPI communicator 6881 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6882 . n - This value should be the same as the local size used in creating the 6883 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6884 calculated if `N` is given) For square matrices `n` is almost always `m`. 6885 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6886 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6887 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6888 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6889 . a - matrix values 6890 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6891 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6892 - oa - matrix values 6893 6894 Output Parameter: 6895 . mat - the matrix 6896 6897 Level: advanced 6898 6899 Notes: 6900 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6901 must free the arrays once the matrix has been destroyed and not before. 6902 6903 The `i` and `j` indices are 0 based 6904 6905 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6906 6907 This sets local rows and cannot be used to set off-processor values. 6908 6909 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6910 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6911 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6912 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6913 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6914 communication if it is known that only local entries will be set. 6915 6916 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6917 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6918 @*/ 6919 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6920 { 6921 Mat_MPIAIJ *maij; 6922 6923 PetscFunctionBegin; 6924 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6925 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6926 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6927 PetscCall(MatCreate(comm, mat)); 6928 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6929 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6930 maij = (Mat_MPIAIJ *)(*mat)->data; 6931 6932 (*mat)->preallocated = PETSC_TRUE; 6933 6934 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6935 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6936 6937 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6938 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6939 6940 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6941 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6942 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6943 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6944 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6945 PetscFunctionReturn(PETSC_SUCCESS); 6946 } 6947 6948 typedef struct { 6949 Mat *mp; /* intermediate products */ 6950 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6951 PetscInt cp; /* number of intermediate products */ 6952 6953 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6954 PetscInt *startsj_s, *startsj_r; 6955 PetscScalar *bufa; 6956 Mat P_oth; 6957 6958 /* may take advantage of merging product->B */ 6959 Mat Bloc; /* B-local by merging diag and off-diag */ 6960 6961 /* cusparse does not have support to split between symbolic and numeric phases. 6962 When api_user is true, we don't need to update the numerical values 6963 of the temporary storage */ 6964 PetscBool reusesym; 6965 6966 /* support for COO values insertion */ 6967 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6968 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6969 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6970 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6971 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6972 PetscMemType mtype; 6973 6974 /* customization */ 6975 PetscBool abmerge; 6976 PetscBool P_oth_bind; 6977 } MatMatMPIAIJBACKEND; 6978 6979 static PetscErrorCode MatProductCtxDestroy_MatMatMPIAIJBACKEND(void **data) 6980 { 6981 MatMatMPIAIJBACKEND *mmdata = *(MatMatMPIAIJBACKEND **)data; 6982 PetscInt i; 6983 6984 PetscFunctionBegin; 6985 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6986 PetscCall(PetscFree(mmdata->bufa)); 6987 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6988 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6989 PetscCall(MatDestroy(&mmdata->P_oth)); 6990 PetscCall(MatDestroy(&mmdata->Bloc)); 6991 PetscCall(PetscSFDestroy(&mmdata->sf)); 6992 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 6993 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6994 PetscCall(PetscFree(mmdata->own[0])); 6995 PetscCall(PetscFree(mmdata->own)); 6996 PetscCall(PetscFree(mmdata->off[0])); 6997 PetscCall(PetscFree(mmdata->off)); 6998 PetscCall(PetscFree(mmdata)); 6999 PetscFunctionReturn(PETSC_SUCCESS); 7000 } 7001 7002 /* Copy selected n entries with indices in idx[] of A to v[]. 7003 If idx is NULL, copy the whole data array of A to v[] 7004 */ 7005 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7006 { 7007 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7008 7009 PetscFunctionBegin; 7010 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7011 if (f) { 7012 PetscCall((*f)(A, n, idx, v)); 7013 } else { 7014 const PetscScalar *vv; 7015 7016 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7017 if (n && idx) { 7018 PetscScalar *w = v; 7019 const PetscInt *oi = idx; 7020 PetscInt j; 7021 7022 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7023 } else { 7024 PetscCall(PetscArraycpy(v, vv, n)); 7025 } 7026 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7027 } 7028 PetscFunctionReturn(PETSC_SUCCESS); 7029 } 7030 7031 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7032 { 7033 MatMatMPIAIJBACKEND *mmdata; 7034 PetscInt i, n_d, n_o; 7035 7036 PetscFunctionBegin; 7037 MatCheckProduct(C, 1); 7038 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7039 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7040 if (!mmdata->reusesym) { /* update temporary matrices */ 7041 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7042 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7043 } 7044 mmdata->reusesym = PETSC_FALSE; 7045 7046 for (i = 0; i < mmdata->cp; i++) { 7047 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7048 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7049 } 7050 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7051 PetscInt noff; 7052 7053 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7054 if (mmdata->mptmp[i]) continue; 7055 if (noff) { 7056 PetscInt nown; 7057 7058 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7059 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7060 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7061 n_o += noff; 7062 n_d += nown; 7063 } else { 7064 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7065 7066 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7067 n_d += mm->nz; 7068 } 7069 } 7070 if (mmdata->hasoffproc) { /* offprocess insertion */ 7071 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7072 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7073 } 7074 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7075 PetscFunctionReturn(PETSC_SUCCESS); 7076 } 7077 7078 /* Support for Pt * A, A * P, or Pt * A * P */ 7079 #define MAX_NUMBER_INTERMEDIATE 4 7080 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7081 { 7082 Mat_Product *product = C->product; 7083 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7084 Mat_MPIAIJ *a, *p; 7085 MatMatMPIAIJBACKEND *mmdata; 7086 ISLocalToGlobalMapping P_oth_l2g = NULL; 7087 IS glob = NULL; 7088 const char *prefix; 7089 char pprefix[256]; 7090 const PetscInt *globidx, *P_oth_idx; 7091 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7092 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7093 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7094 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7095 /* a base offset; type-2: sparse with a local to global map table */ 7096 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7097 7098 MatProductType ptype; 7099 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7100 PetscMPIInt size; 7101 7102 PetscFunctionBegin; 7103 MatCheckProduct(C, 1); 7104 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7105 ptype = product->type; 7106 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7107 ptype = MATPRODUCT_AB; 7108 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7109 } 7110 switch (ptype) { 7111 case MATPRODUCT_AB: 7112 A = product->A; 7113 P = product->B; 7114 m = A->rmap->n; 7115 n = P->cmap->n; 7116 M = A->rmap->N; 7117 N = P->cmap->N; 7118 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7119 break; 7120 case MATPRODUCT_AtB: 7121 P = product->A; 7122 A = product->B; 7123 m = P->cmap->n; 7124 n = A->cmap->n; 7125 M = P->cmap->N; 7126 N = A->cmap->N; 7127 hasoffproc = PETSC_TRUE; 7128 break; 7129 case MATPRODUCT_PtAP: 7130 A = product->A; 7131 P = product->B; 7132 m = P->cmap->n; 7133 n = P->cmap->n; 7134 M = P->cmap->N; 7135 N = P->cmap->N; 7136 hasoffproc = PETSC_TRUE; 7137 break; 7138 default: 7139 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7140 } 7141 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7142 if (size == 1) hasoffproc = PETSC_FALSE; 7143 7144 /* defaults */ 7145 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7146 mp[i] = NULL; 7147 mptmp[i] = PETSC_FALSE; 7148 rmapt[i] = -1; 7149 cmapt[i] = -1; 7150 rmapa[i] = NULL; 7151 cmapa[i] = NULL; 7152 } 7153 7154 /* customization */ 7155 PetscCall(PetscNew(&mmdata)); 7156 mmdata->reusesym = product->api_user; 7157 if (ptype == MATPRODUCT_AB) { 7158 if (product->api_user) { 7159 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7160 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7161 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7162 PetscOptionsEnd(); 7163 } else { 7164 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7165 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7166 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7167 PetscOptionsEnd(); 7168 } 7169 } else if (ptype == MATPRODUCT_PtAP) { 7170 if (product->api_user) { 7171 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7172 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7173 PetscOptionsEnd(); 7174 } else { 7175 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7176 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7177 PetscOptionsEnd(); 7178 } 7179 } 7180 a = (Mat_MPIAIJ *)A->data; 7181 p = (Mat_MPIAIJ *)P->data; 7182 PetscCall(MatSetSizes(C, m, n, M, N)); 7183 PetscCall(PetscLayoutSetUp(C->rmap)); 7184 PetscCall(PetscLayoutSetUp(C->cmap)); 7185 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7186 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7187 7188 cp = 0; 7189 switch (ptype) { 7190 case MATPRODUCT_AB: /* A * P */ 7191 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7192 7193 /* A_diag * P_local (merged or not) */ 7194 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7195 /* P is product->B */ 7196 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7197 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7198 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7199 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7200 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7201 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7202 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7203 mp[cp]->product->api_user = product->api_user; 7204 PetscCall(MatProductSetFromOptions(mp[cp])); 7205 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7206 PetscCall(ISGetIndices(glob, &globidx)); 7207 rmapt[cp] = 1; 7208 cmapt[cp] = 2; 7209 cmapa[cp] = globidx; 7210 mptmp[cp] = PETSC_FALSE; 7211 cp++; 7212 } else { /* A_diag * P_diag and A_diag * P_off */ 7213 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7214 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7215 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7216 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7217 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7218 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7219 mp[cp]->product->api_user = product->api_user; 7220 PetscCall(MatProductSetFromOptions(mp[cp])); 7221 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7222 rmapt[cp] = 1; 7223 cmapt[cp] = 1; 7224 mptmp[cp] = PETSC_FALSE; 7225 cp++; 7226 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7227 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7228 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7229 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7230 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7231 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7232 mp[cp]->product->api_user = product->api_user; 7233 PetscCall(MatProductSetFromOptions(mp[cp])); 7234 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7235 rmapt[cp] = 1; 7236 cmapt[cp] = 2; 7237 cmapa[cp] = p->garray; 7238 mptmp[cp] = PETSC_FALSE; 7239 cp++; 7240 } 7241 7242 /* A_off * P_other */ 7243 if (mmdata->P_oth) { 7244 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7245 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7246 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7247 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7248 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7249 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7250 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7251 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7252 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7253 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7254 mp[cp]->product->api_user = product->api_user; 7255 PetscCall(MatProductSetFromOptions(mp[cp])); 7256 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7257 rmapt[cp] = 1; 7258 cmapt[cp] = 2; 7259 cmapa[cp] = P_oth_idx; 7260 mptmp[cp] = PETSC_FALSE; 7261 cp++; 7262 } 7263 break; 7264 7265 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7266 /* A is product->B */ 7267 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7268 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7269 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7270 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7271 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7272 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7273 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7274 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7275 mp[cp]->product->api_user = product->api_user; 7276 PetscCall(MatProductSetFromOptions(mp[cp])); 7277 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7278 PetscCall(ISGetIndices(glob, &globidx)); 7279 rmapt[cp] = 2; 7280 rmapa[cp] = globidx; 7281 cmapt[cp] = 2; 7282 cmapa[cp] = globidx; 7283 mptmp[cp] = PETSC_FALSE; 7284 cp++; 7285 } else { 7286 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7287 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7288 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7289 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7290 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7291 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7292 mp[cp]->product->api_user = product->api_user; 7293 PetscCall(MatProductSetFromOptions(mp[cp])); 7294 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7295 PetscCall(ISGetIndices(glob, &globidx)); 7296 rmapt[cp] = 1; 7297 cmapt[cp] = 2; 7298 cmapa[cp] = globidx; 7299 mptmp[cp] = PETSC_FALSE; 7300 cp++; 7301 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7302 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7303 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7304 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7305 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7306 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7307 mp[cp]->product->api_user = product->api_user; 7308 PetscCall(MatProductSetFromOptions(mp[cp])); 7309 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7310 rmapt[cp] = 2; 7311 rmapa[cp] = p->garray; 7312 cmapt[cp] = 2; 7313 cmapa[cp] = globidx; 7314 mptmp[cp] = PETSC_FALSE; 7315 cp++; 7316 } 7317 break; 7318 case MATPRODUCT_PtAP: 7319 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7320 /* P is product->B */ 7321 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7322 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7323 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7324 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7325 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7326 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7327 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7328 mp[cp]->product->api_user = product->api_user; 7329 PetscCall(MatProductSetFromOptions(mp[cp])); 7330 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7331 PetscCall(ISGetIndices(glob, &globidx)); 7332 rmapt[cp] = 2; 7333 rmapa[cp] = globidx; 7334 cmapt[cp] = 2; 7335 cmapa[cp] = globidx; 7336 mptmp[cp] = PETSC_FALSE; 7337 cp++; 7338 if (mmdata->P_oth) { 7339 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7340 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7341 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7342 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7343 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7344 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7345 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7346 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7347 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7348 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7349 mp[cp]->product->api_user = product->api_user; 7350 PetscCall(MatProductSetFromOptions(mp[cp])); 7351 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7352 mptmp[cp] = PETSC_TRUE; 7353 cp++; 7354 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7355 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7356 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7357 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7358 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7359 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7360 mp[cp]->product->api_user = product->api_user; 7361 PetscCall(MatProductSetFromOptions(mp[cp])); 7362 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7363 rmapt[cp] = 2; 7364 rmapa[cp] = globidx; 7365 cmapt[cp] = 2; 7366 cmapa[cp] = P_oth_idx; 7367 mptmp[cp] = PETSC_FALSE; 7368 cp++; 7369 } 7370 break; 7371 default: 7372 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7373 } 7374 /* sanity check */ 7375 if (size > 1) 7376 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7377 7378 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7379 for (i = 0; i < cp; i++) { 7380 mmdata->mp[i] = mp[i]; 7381 mmdata->mptmp[i] = mptmp[i]; 7382 } 7383 mmdata->cp = cp; 7384 C->product->data = mmdata; 7385 C->product->destroy = MatProductCtxDestroy_MatMatMPIAIJBACKEND; 7386 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7387 7388 /* memory type */ 7389 mmdata->mtype = PETSC_MEMTYPE_HOST; 7390 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7391 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7392 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7393 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7394 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7395 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7396 7397 /* prepare coo coordinates for values insertion */ 7398 7399 /* count total nonzeros of those intermediate seqaij Mats 7400 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7401 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7402 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7403 */ 7404 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7405 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7406 if (mptmp[cp]) continue; 7407 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7408 const PetscInt *rmap = rmapa[cp]; 7409 const PetscInt mr = mp[cp]->rmap->n; 7410 const PetscInt rs = C->rmap->rstart; 7411 const PetscInt re = C->rmap->rend; 7412 const PetscInt *ii = mm->i; 7413 for (i = 0; i < mr; i++) { 7414 const PetscInt gr = rmap[i]; 7415 const PetscInt nz = ii[i + 1] - ii[i]; 7416 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7417 else ncoo_oown += nz; /* this row is local */ 7418 } 7419 } else ncoo_d += mm->nz; 7420 } 7421 7422 /* 7423 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7424 7425 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7426 7427 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7428 7429 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7430 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7431 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7432 7433 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7434 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7435 */ 7436 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7437 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7438 7439 /* gather (i,j) of nonzeros inserted by remote procs */ 7440 if (hasoffproc) { 7441 PetscSF msf; 7442 PetscInt ncoo2, *coo_i2, *coo_j2; 7443 7444 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7445 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7446 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7447 7448 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7449 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7450 PetscInt *idxoff = mmdata->off[cp]; 7451 PetscInt *idxown = mmdata->own[cp]; 7452 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7453 const PetscInt *rmap = rmapa[cp]; 7454 const PetscInt *cmap = cmapa[cp]; 7455 const PetscInt *ii = mm->i; 7456 PetscInt *coi = coo_i + ncoo_o; 7457 PetscInt *coj = coo_j + ncoo_o; 7458 const PetscInt mr = mp[cp]->rmap->n; 7459 const PetscInt rs = C->rmap->rstart; 7460 const PetscInt re = C->rmap->rend; 7461 const PetscInt cs = C->cmap->rstart; 7462 for (i = 0; i < mr; i++) { 7463 const PetscInt *jj = mm->j + ii[i]; 7464 const PetscInt gr = rmap[i]; 7465 const PetscInt nz = ii[i + 1] - ii[i]; 7466 if (gr < rs || gr >= re) { /* this is an offproc row */ 7467 for (j = ii[i]; j < ii[i + 1]; j++) { 7468 *coi++ = gr; 7469 *idxoff++ = j; 7470 } 7471 if (!cmapt[cp]) { /* already global */ 7472 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7473 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7474 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7475 } else { /* offdiag */ 7476 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7477 } 7478 ncoo_o += nz; 7479 } else { /* this is a local row */ 7480 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7481 } 7482 } 7483 } 7484 mmdata->off[cp + 1] = idxoff; 7485 mmdata->own[cp + 1] = idxown; 7486 } 7487 7488 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7489 PetscInt incoo_o; 7490 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7491 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7492 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7493 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7494 ncoo = ncoo_d + ncoo_oown + ncoo2; 7495 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7496 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7497 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7498 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7499 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7500 PetscCall(PetscFree2(coo_i, coo_j)); 7501 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7502 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7503 coo_i = coo_i2; 7504 coo_j = coo_j2; 7505 } else { /* no offproc values insertion */ 7506 ncoo = ncoo_d; 7507 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7508 7509 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7510 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7511 PetscCall(PetscSFSetUp(mmdata->sf)); 7512 } 7513 mmdata->hasoffproc = hasoffproc; 7514 7515 /* gather (i,j) of nonzeros inserted locally */ 7516 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7517 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7518 PetscInt *coi = coo_i + ncoo_d; 7519 PetscInt *coj = coo_j + ncoo_d; 7520 const PetscInt *jj = mm->j; 7521 const PetscInt *ii = mm->i; 7522 const PetscInt *cmap = cmapa[cp]; 7523 const PetscInt *rmap = rmapa[cp]; 7524 const PetscInt mr = mp[cp]->rmap->n; 7525 const PetscInt rs = C->rmap->rstart; 7526 const PetscInt re = C->rmap->rend; 7527 const PetscInt cs = C->cmap->rstart; 7528 7529 if (mptmp[cp]) continue; 7530 if (rmapt[cp] == 1) { /* consecutive rows */ 7531 /* fill coo_i */ 7532 for (i = 0; i < mr; i++) { 7533 const PetscInt gr = i + rs; 7534 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7535 } 7536 /* fill coo_j */ 7537 if (!cmapt[cp]) { /* type-0, already global */ 7538 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7539 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7540 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7541 } else { /* type-2, local to global for sparse columns */ 7542 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7543 } 7544 ncoo_d += mm->nz; 7545 } else if (rmapt[cp] == 2) { /* sparse rows */ 7546 for (i = 0; i < mr; i++) { 7547 const PetscInt *jj = mm->j + ii[i]; 7548 const PetscInt gr = rmap[i]; 7549 const PetscInt nz = ii[i + 1] - ii[i]; 7550 if (gr >= rs && gr < re) { /* local rows */ 7551 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7552 if (!cmapt[cp]) { /* type-0, already global */ 7553 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7554 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7555 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7556 } else { /* type-2, local to global for sparse columns */ 7557 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7558 } 7559 ncoo_d += nz; 7560 } 7561 } 7562 } 7563 } 7564 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7565 PetscCall(ISDestroy(&glob)); 7566 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7567 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7568 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7569 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7570 7571 /* set block sizes */ 7572 A = product->A; 7573 P = product->B; 7574 switch (ptype) { 7575 case MATPRODUCT_PtAP: 7576 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7577 break; 7578 case MATPRODUCT_RARt: 7579 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7580 break; 7581 case MATPRODUCT_ABC: 7582 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7583 break; 7584 case MATPRODUCT_AB: 7585 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7586 break; 7587 case MATPRODUCT_AtB: 7588 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7589 break; 7590 case MATPRODUCT_ABt: 7591 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7592 break; 7593 default: 7594 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7595 } 7596 7597 /* preallocate with COO data */ 7598 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7599 PetscCall(PetscFree2(coo_i, coo_j)); 7600 PetscFunctionReturn(PETSC_SUCCESS); 7601 } 7602 7603 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7604 { 7605 Mat_Product *product = mat->product; 7606 #if defined(PETSC_HAVE_DEVICE) 7607 PetscBool match = PETSC_FALSE; 7608 PetscBool usecpu = PETSC_FALSE; 7609 #else 7610 PetscBool match = PETSC_TRUE; 7611 #endif 7612 7613 PetscFunctionBegin; 7614 MatCheckProduct(mat, 1); 7615 #if defined(PETSC_HAVE_DEVICE) 7616 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7617 if (match) { /* we can always fallback to the CPU if requested */ 7618 switch (product->type) { 7619 case MATPRODUCT_AB: 7620 if (product->api_user) { 7621 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7622 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7623 PetscOptionsEnd(); 7624 } else { 7625 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7626 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7627 PetscOptionsEnd(); 7628 } 7629 break; 7630 case MATPRODUCT_AtB: 7631 if (product->api_user) { 7632 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7633 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7634 PetscOptionsEnd(); 7635 } else { 7636 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7637 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7638 PetscOptionsEnd(); 7639 } 7640 break; 7641 case MATPRODUCT_PtAP: 7642 if (product->api_user) { 7643 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7644 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7645 PetscOptionsEnd(); 7646 } else { 7647 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7648 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7649 PetscOptionsEnd(); 7650 } 7651 break; 7652 default: 7653 break; 7654 } 7655 match = (PetscBool)!usecpu; 7656 } 7657 #endif 7658 if (match) { 7659 switch (product->type) { 7660 case MATPRODUCT_AB: 7661 case MATPRODUCT_AtB: 7662 case MATPRODUCT_PtAP: 7663 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7664 break; 7665 default: 7666 break; 7667 } 7668 } 7669 /* fallback to MPIAIJ ops */ 7670 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7671 PetscFunctionReturn(PETSC_SUCCESS); 7672 } 7673 7674 /* 7675 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7676 7677 n - the number of block indices in cc[] 7678 cc - the block indices (must be large enough to contain the indices) 7679 */ 7680 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7681 { 7682 PetscInt cnt = -1, nidx, j; 7683 const PetscInt *idx; 7684 7685 PetscFunctionBegin; 7686 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7687 if (nidx) { 7688 cnt = 0; 7689 cc[cnt] = idx[0] / bs; 7690 for (j = 1; j < nidx; j++) { 7691 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7692 } 7693 } 7694 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7695 *n = cnt + 1; 7696 PetscFunctionReturn(PETSC_SUCCESS); 7697 } 7698 7699 /* 7700 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7701 7702 ncollapsed - the number of block indices 7703 collapsed - the block indices (must be large enough to contain the indices) 7704 */ 7705 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7706 { 7707 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7708 7709 PetscFunctionBegin; 7710 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7711 for (i = start + 1; i < start + bs; i++) { 7712 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7713 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7714 cprevtmp = cprev; 7715 cprev = merged; 7716 merged = cprevtmp; 7717 } 7718 *ncollapsed = nprev; 7719 if (collapsed) *collapsed = cprev; 7720 PetscFunctionReturn(PETSC_SUCCESS); 7721 } 7722 7723 /* 7724 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7725 7726 Input Parameter: 7727 . Amat - matrix 7728 - symmetrize - make the result symmetric 7729 + scale - scale with diagonal 7730 7731 Output Parameter: 7732 . a_Gmat - output scalar graph >= 0 7733 7734 */ 7735 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7736 { 7737 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7738 MPI_Comm comm; 7739 Mat Gmat; 7740 PetscBool ismpiaij, isseqaij; 7741 Mat a, b, c; 7742 MatType jtype; 7743 7744 PetscFunctionBegin; 7745 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7746 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7747 PetscCall(MatGetSize(Amat, &MM, &NN)); 7748 PetscCall(MatGetBlockSize(Amat, &bs)); 7749 nloc = (Iend - Istart) / bs; 7750 7751 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7752 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7753 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7754 7755 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7756 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7757 implementation */ 7758 if (bs > 1) { 7759 PetscCall(MatGetType(Amat, &jtype)); 7760 PetscCall(MatCreate(comm, &Gmat)); 7761 PetscCall(MatSetType(Gmat, jtype)); 7762 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7763 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7764 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7765 PetscInt *d_nnz, *o_nnz; 7766 MatScalar *aa, val, *AA; 7767 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7768 7769 if (isseqaij) { 7770 a = Amat; 7771 b = NULL; 7772 } else { 7773 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7774 a = d->A; 7775 b = d->B; 7776 } 7777 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7778 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7779 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7780 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7781 const PetscInt *cols1, *cols2; 7782 7783 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7784 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7785 nnz[brow / bs] = nc2 / bs; 7786 if (nc2 % bs) ok = 0; 7787 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7788 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7789 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7790 if (nc1 != nc2) ok = 0; 7791 else { 7792 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7793 if (cols1[jj] != cols2[jj]) ok = 0; 7794 if (cols1[jj] % bs != jj % bs) ok = 0; 7795 } 7796 } 7797 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7798 } 7799 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7800 if (!ok) { 7801 PetscCall(PetscFree2(d_nnz, o_nnz)); 7802 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7803 goto old_bs; 7804 } 7805 } 7806 } 7807 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7808 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7809 PetscCall(PetscFree2(d_nnz, o_nnz)); 7810 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7811 // diag 7812 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7813 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7814 7815 ai = aseq->i; 7816 n = ai[brow + 1] - ai[brow]; 7817 aj = aseq->j + ai[brow]; 7818 for (PetscInt k = 0; k < n; k += bs) { // block columns 7819 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7820 val = 0; 7821 if (index_size == 0) { 7822 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7823 aa = aseq->a + ai[brow + ii] + k; 7824 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7825 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7826 } 7827 } 7828 } else { // use (index,index) value if provided 7829 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7830 PetscInt ii = index[iii]; 7831 aa = aseq->a + ai[brow + ii] + k; 7832 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7833 PetscInt jj = index[jjj]; 7834 val += PetscAbs(PetscRealPart(aa[jj])); 7835 } 7836 } 7837 } 7838 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7839 AA[k / bs] = val; 7840 } 7841 grow = Istart / bs + brow / bs; 7842 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7843 } 7844 // off-diag 7845 if (ismpiaij) { 7846 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7847 const PetscScalar *vals; 7848 const PetscInt *cols, *garray = aij->garray; 7849 7850 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7851 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7852 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7853 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7854 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7855 AA[k / bs] = 0; 7856 AJ[cidx] = garray[cols[k]] / bs; 7857 } 7858 nc = ncols / bs; 7859 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7860 if (index_size == 0) { 7861 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7862 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7863 for (PetscInt k = 0; k < ncols; k += bs) { 7864 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7865 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7866 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7867 } 7868 } 7869 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7870 } 7871 } else { // use (index,index) value if provided 7872 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7873 PetscInt ii = index[iii]; 7874 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7875 for (PetscInt k = 0; k < ncols; k += bs) { 7876 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7877 PetscInt jj = index[jjj]; 7878 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7879 } 7880 } 7881 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7882 } 7883 } 7884 grow = Istart / bs + brow / bs; 7885 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7886 } 7887 } 7888 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7889 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7890 PetscCall(PetscFree2(AA, AJ)); 7891 } else { 7892 const PetscScalar *vals; 7893 const PetscInt *idx; 7894 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7895 old_bs: 7896 /* 7897 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7898 */ 7899 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7900 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7901 if (isseqaij) { 7902 PetscInt max_d_nnz; 7903 7904 /* 7905 Determine exact preallocation count for (sequential) scalar matrix 7906 */ 7907 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7908 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7909 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7910 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7911 PetscCall(PetscFree3(w0, w1, w2)); 7912 } else if (ismpiaij) { 7913 Mat Daij, Oaij; 7914 const PetscInt *garray; 7915 PetscInt max_d_nnz; 7916 7917 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7918 /* 7919 Determine exact preallocation count for diagonal block portion of scalar matrix 7920 */ 7921 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7922 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7923 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7924 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7925 PetscCall(PetscFree3(w0, w1, w2)); 7926 /* 7927 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7928 */ 7929 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7930 o_nnz[jj] = 0; 7931 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7932 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7933 o_nnz[jj] += ncols; 7934 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7935 } 7936 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7937 } 7938 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7939 /* get scalar copy (norms) of matrix */ 7940 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7941 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7942 PetscCall(PetscFree2(d_nnz, o_nnz)); 7943 for (Ii = Istart; Ii < Iend; Ii++) { 7944 PetscInt dest_row = Ii / bs; 7945 7946 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7947 for (jj = 0; jj < ncols; jj++) { 7948 PetscInt dest_col = idx[jj] / bs; 7949 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7950 7951 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7952 } 7953 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7954 } 7955 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7956 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7957 } 7958 } else { 7959 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7960 else { 7961 Gmat = Amat; 7962 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7963 } 7964 if (isseqaij) { 7965 a = Gmat; 7966 b = NULL; 7967 } else { 7968 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7969 a = d->A; 7970 b = d->B; 7971 } 7972 if (filter >= 0 || scale) { 7973 /* take absolute value of each entry */ 7974 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7975 MatInfo info; 7976 PetscScalar *avals; 7977 7978 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7979 PetscCall(MatSeqAIJGetArray(c, &avals)); 7980 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7981 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7982 } 7983 } 7984 } 7985 if (symmetrize) { 7986 PetscBool isset, issym; 7987 7988 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7989 if (!isset || !issym) { 7990 Mat matTrans; 7991 7992 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7993 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7994 PetscCall(MatDestroy(&matTrans)); 7995 } 7996 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7997 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7998 if (scale) { 7999 /* scale c for all diagonal values = 1 or -1 */ 8000 Vec diag; 8001 8002 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8003 PetscCall(MatGetDiagonal(Gmat, diag)); 8004 PetscCall(VecReciprocal(diag)); 8005 PetscCall(VecSqrtAbs(diag)); 8006 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8007 PetscCall(VecDestroy(&diag)); 8008 } 8009 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8010 if (filter >= 0) { 8011 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8012 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8013 } 8014 *a_Gmat = Gmat; 8015 PetscFunctionReturn(PETSC_SUCCESS); 8016 } 8017 8018 PETSC_INTERN PetscErrorCode MatGetCurrentMemType_MPIAIJ(Mat A, PetscMemType *memtype) 8019 { 8020 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)A->data; 8021 PetscMemType mD = PETSC_MEMTYPE_HOST, mO = PETSC_MEMTYPE_HOST; 8022 8023 PetscFunctionBegin; 8024 if (mpiaij->A) PetscCall(MatGetCurrentMemType(mpiaij->A, &mD)); 8025 if (mpiaij->B) PetscCall(MatGetCurrentMemType(mpiaij->B, &mO)); 8026 *memtype = (mD == mO) ? mD : PETSC_MEMTYPE_HOST; 8027 PetscFunctionReturn(PETSC_SUCCESS); 8028 } 8029 8030 /* 8031 Special version for direct calls from Fortran 8032 */ 8033 8034 /* Change these macros so can be used in void function */ 8035 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8036 #undef PetscCall 8037 #define PetscCall(...) \ 8038 do { \ 8039 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8040 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8041 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8042 return; \ 8043 } \ 8044 } while (0) 8045 8046 #undef SETERRQ 8047 #define SETERRQ(comm, ierr, ...) \ 8048 do { \ 8049 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8050 return; \ 8051 } while (0) 8052 8053 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8054 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8055 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8056 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8057 #else 8058 #endif 8059 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8060 { 8061 Mat mat = *mmat; 8062 PetscInt m = *mm, n = *mn; 8063 InsertMode addv = *maddv; 8064 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8065 PetscScalar value; 8066 8067 MatCheckPreallocated(mat, 1); 8068 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8069 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8070 { 8071 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8072 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8073 PetscBool roworiented = aij->roworiented; 8074 8075 /* Some Variables required in the macro */ 8076 Mat A = aij->A; 8077 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8078 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8079 MatScalar *aa; 8080 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8081 Mat B = aij->B; 8082 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8083 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8084 MatScalar *ba; 8085 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8086 * cannot use "#if defined" inside a macro. */ 8087 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8088 8089 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8090 PetscInt nonew = a->nonew; 8091 MatScalar *ap1, *ap2; 8092 8093 PetscFunctionBegin; 8094 PetscCall(MatSeqAIJGetArray(A, &aa)); 8095 PetscCall(MatSeqAIJGetArray(B, &ba)); 8096 for (i = 0; i < m; i++) { 8097 if (im[i] < 0) continue; 8098 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8099 if (im[i] >= rstart && im[i] < rend) { 8100 row = im[i] - rstart; 8101 lastcol1 = -1; 8102 rp1 = aj + ai[row]; 8103 ap1 = aa + ai[row]; 8104 rmax1 = aimax[row]; 8105 nrow1 = ailen[row]; 8106 low1 = 0; 8107 high1 = nrow1; 8108 lastcol2 = -1; 8109 rp2 = bj + bi[row]; 8110 ap2 = ba + bi[row]; 8111 rmax2 = bimax[row]; 8112 nrow2 = bilen[row]; 8113 low2 = 0; 8114 high2 = nrow2; 8115 8116 for (j = 0; j < n; j++) { 8117 if (roworiented) value = v[i * n + j]; 8118 else value = v[i + j * m]; 8119 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8120 if (in[j] >= cstart && in[j] < cend) { 8121 col = in[j] - cstart; 8122 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8123 } else if (in[j] < 0) continue; 8124 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8125 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8126 } else { 8127 if (mat->was_assembled) { 8128 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8129 #if defined(PETSC_USE_CTABLE) 8130 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8131 col--; 8132 #else 8133 col = aij->colmap[in[j]] - 1; 8134 #endif 8135 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8136 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8137 col = in[j]; 8138 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8139 B = aij->B; 8140 b = (Mat_SeqAIJ *)B->data; 8141 bimax = b->imax; 8142 bi = b->i; 8143 bilen = b->ilen; 8144 bj = b->j; 8145 rp2 = bj + bi[row]; 8146 ap2 = ba + bi[row]; 8147 rmax2 = bimax[row]; 8148 nrow2 = bilen[row]; 8149 low2 = 0; 8150 high2 = nrow2; 8151 bm = aij->B->rmap->n; 8152 ba = b->a; 8153 inserted = PETSC_FALSE; 8154 } 8155 } else col = in[j]; 8156 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8157 } 8158 } 8159 } else if (!aij->donotstash) { 8160 if (roworiented) { 8161 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8162 } else { 8163 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8164 } 8165 } 8166 } 8167 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8168 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8169 } 8170 PetscFunctionReturnVoid(); 8171 } 8172 8173 /* Undefining these here since they were redefined from their original definition above! No 8174 * other PETSc functions should be defined past this point, as it is impossible to recover the 8175 * original definitions */ 8176 #undef PetscCall 8177 #undef SETERRQ 8178