1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 10 #define TYPE AIJ 11 #define TYPE_AIJ 12 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 13 #undef TYPE 14 #undef TYPE_AIJ 15 16 static PetscErrorCode MatReset_MPIAIJ(Mat mat) 17 { 18 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 19 20 PetscFunctionBegin; 21 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 22 PetscCall(MatStashDestroy_Private(&mat->stash)); 23 PetscCall(VecDestroy(&aij->diag)); 24 PetscCall(MatDestroy(&aij->A)); 25 PetscCall(MatDestroy(&aij->B)); 26 #if defined(PETSC_USE_CTABLE) 27 PetscCall(PetscHMapIDestroy(&aij->colmap)); 28 #else 29 PetscCall(PetscFree(aij->colmap)); 30 #endif 31 PetscCall(PetscFree(aij->garray)); 32 PetscCall(VecDestroy(&aij->lvec)); 33 PetscCall(VecScatterDestroy(&aij->Mvctx)); 34 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 35 PetscCall(PetscFree(aij->ld)); 36 PetscFunctionReturn(PETSC_SUCCESS); 37 } 38 39 static PetscErrorCode MatResetHash_MPIAIJ(Mat mat) 40 { 41 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 42 /* Save the nonzero states of the component matrices because those are what are used to determine 43 the nonzero state of mat */ 44 PetscObjectState Astate = aij->A->nonzerostate, Bstate = aij->B->nonzerostate; 45 46 PetscFunctionBegin; 47 PetscCall(MatReset_MPIAIJ(mat)); 48 PetscCall(MatSetUp_MPI_Hash(mat)); 49 aij->A->nonzerostate = ++Astate, aij->B->nonzerostate = ++Bstate; 50 PetscFunctionReturn(PETSC_SUCCESS); 51 } 52 53 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) 54 { 55 PetscFunctionBegin; 56 PetscCall(MatReset_MPIAIJ(mat)); 57 58 PetscCall(PetscFree(mat->data)); 59 60 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 61 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 62 63 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 64 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 65 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 66 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 67 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 68 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 69 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetHash_C", NULL)); 70 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 71 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 72 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 73 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 74 #if defined(PETSC_HAVE_CUDA) 75 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 76 #endif 77 #if defined(PETSC_HAVE_HIP) 78 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijhipsparse_C", NULL)); 79 #endif 80 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 81 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 82 #endif 83 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 84 #if defined(PETSC_HAVE_ELEMENTAL) 85 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 86 #endif 87 #if defined(PETSC_HAVE_SCALAPACK) 88 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 89 #endif 90 #if defined(PETSC_HAVE_HYPRE) 91 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 92 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 93 #endif 94 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 95 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 96 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 97 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 98 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 99 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 100 #if defined(PETSC_HAVE_MKL_SPARSE) 101 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 102 #endif 103 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 104 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 105 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 106 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 107 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 108 PetscFunctionReturn(PETSC_SUCCESS); 109 } 110 111 static PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 112 { 113 Mat B; 114 115 PetscFunctionBegin; 116 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 117 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 118 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 119 PetscCall(MatDestroy(&B)); 120 PetscFunctionReturn(PETSC_SUCCESS); 121 } 122 123 static PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) 124 { 125 Mat B; 126 127 PetscFunctionBegin; 128 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 129 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 130 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 131 PetscFunctionReturn(PETSC_SUCCESS); 132 } 133 134 /*MC 135 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 136 137 This matrix type is identical to` MATSEQAIJ` when constructed with a single process communicator, 138 and `MATMPIAIJ` otherwise. As a result, for single process communicators, 139 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 140 for communicators controlling multiple processes. It is recommended that you call both of 141 the above preallocation routines for simplicity. 142 143 Options Database Key: 144 . -mat_type aij - sets the matrix type to `MATAIJ` during a call to `MatSetFromOptions()` 145 146 Developer Note: 147 Level: beginner 148 149 Subclasses include `MATAIJCUSPARSE`, `MATAIJPERM`, `MATAIJSELL`, `MATAIJMKL`, `MATAIJCRL`, `MATAIJKOKKOS`,and also automatically switches over to use inodes when 150 enough exist. 151 152 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 153 M*/ 154 155 /*MC 156 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 157 158 This matrix type is identical to `MATSEQAIJCRL` when constructed with a single process communicator, 159 and `MATMPIAIJCRL` otherwise. As a result, for single process communicators, 160 `MatSeqAIJSetPreallocation()` is supported, and similarly `MatMPIAIJSetPreallocation()` is supported 161 for communicators controlling multiple processes. It is recommended that you call both of 162 the above preallocation routines for simplicity. 163 164 Options Database Key: 165 . -mat_type aijcrl - sets the matrix type to `MATMPIAIJCRL` during a call to `MatSetFromOptions()` 166 167 Level: beginner 168 169 .seealso: [](ch_matrices), `Mat`, `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 170 M*/ 171 172 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) 173 { 174 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 175 176 PetscFunctionBegin; 177 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP) || defined(PETSC_HAVE_VIENNACL) 178 A->boundtocpu = flg; 179 #endif 180 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 181 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 182 183 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 184 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 185 * to differ from the parent matrix. */ 186 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 187 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 188 PetscFunctionReturn(PETSC_SUCCESS); 189 } 190 191 static PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) 192 { 193 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 194 195 PetscFunctionBegin; 196 if (mat->A) { 197 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 198 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 199 } 200 PetscFunctionReturn(PETSC_SUCCESS); 201 } 202 203 static PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) 204 { 205 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 206 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 207 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 208 const PetscInt *ia, *ib; 209 const MatScalar *aa, *bb, *aav, *bav; 210 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 211 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 212 213 PetscFunctionBegin; 214 *keptrows = NULL; 215 216 ia = a->i; 217 ib = b->i; 218 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 219 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 220 for (i = 0; i < m; i++) { 221 na = ia[i + 1] - ia[i]; 222 nb = ib[i + 1] - ib[i]; 223 if (!na && !nb) { 224 cnt++; 225 goto ok1; 226 } 227 aa = aav + ia[i]; 228 for (j = 0; j < na; j++) { 229 if (aa[j] != 0.0) goto ok1; 230 } 231 bb = PetscSafePointerPlusOffset(bav, ib[i]); 232 for (j = 0; j < nb; j++) { 233 if (bb[j] != 0.0) goto ok1; 234 } 235 cnt++; 236 ok1:; 237 } 238 PetscCallMPI(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 239 if (!n0rows) { 240 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 241 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 242 PetscFunctionReturn(PETSC_SUCCESS); 243 } 244 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 245 cnt = 0; 246 for (i = 0; i < m; i++) { 247 na = ia[i + 1] - ia[i]; 248 nb = ib[i + 1] - ib[i]; 249 if (!na && !nb) continue; 250 aa = aav + ia[i]; 251 for (j = 0; j < na; j++) { 252 if (aa[j] != 0.0) { 253 rows[cnt++] = rstart + i; 254 goto ok2; 255 } 256 } 257 bb = PetscSafePointerPlusOffset(bav, ib[i]); 258 for (j = 0; j < nb; j++) { 259 if (bb[j] != 0.0) { 260 rows[cnt++] = rstart + i; 261 goto ok2; 262 } 263 } 264 ok2:; 265 } 266 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 267 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 268 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 269 PetscFunctionReturn(PETSC_SUCCESS); 270 } 271 272 static PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) 273 { 274 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 275 PetscBool cong; 276 277 PetscFunctionBegin; 278 PetscCall(MatHasCongruentLayouts(Y, &cong)); 279 if (Y->assembled && cong) { 280 PetscCall(MatDiagonalSet(aij->A, D, is)); 281 } else { 282 PetscCall(MatDiagonalSet_Default(Y, D, is)); 283 } 284 PetscFunctionReturn(PETSC_SUCCESS); 285 } 286 287 static PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) 288 { 289 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 290 PetscInt i, rstart, nrows, *rows; 291 292 PetscFunctionBegin; 293 *zrows = NULL; 294 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 295 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 296 for (i = 0; i < nrows; i++) rows[i] += rstart; 297 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 298 PetscFunctionReturn(PETSC_SUCCESS); 299 } 300 301 static PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) 302 { 303 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 304 PetscInt i, m, n, *garray = aij->garray; 305 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 306 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 307 PetscReal *work; 308 const PetscScalar *dummy; 309 310 PetscFunctionBegin; 311 PetscCall(MatGetSize(A, &m, &n)); 312 PetscCall(PetscCalloc1(n, &work)); 313 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 314 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 315 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 316 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 317 if (type == NORM_2) { 318 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); 319 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); 320 } else if (type == NORM_1) { 321 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); 322 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); 323 } else if (type == NORM_INFINITY) { 324 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); 325 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); 326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 327 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); 328 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); 329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 330 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); 331 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); 332 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 333 if (type == NORM_INFINITY) { 334 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 335 } else { 336 PetscCallMPI(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 337 } 338 PetscCall(PetscFree(work)); 339 if (type == NORM_2) { 340 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 341 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 342 for (i = 0; i < n; i++) reductions[i] /= m; 343 } 344 PetscFunctionReturn(PETSC_SUCCESS); 345 } 346 347 static PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) 348 { 349 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 350 IS sis, gis; 351 const PetscInt *isis, *igis; 352 PetscInt n, *iis, nsis, ngis, rstart, i; 353 354 PetscFunctionBegin; 355 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 356 PetscCall(MatFindNonzeroRows(a->B, &gis)); 357 PetscCall(ISGetSize(gis, &ngis)); 358 PetscCall(ISGetSize(sis, &nsis)); 359 PetscCall(ISGetIndices(sis, &isis)); 360 PetscCall(ISGetIndices(gis, &igis)); 361 362 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 363 PetscCall(PetscArraycpy(iis, igis, ngis)); 364 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 365 n = ngis + nsis; 366 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 367 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 368 for (i = 0; i < n; i++) iis[i] += rstart; 369 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 370 371 PetscCall(ISRestoreIndices(sis, &isis)); 372 PetscCall(ISRestoreIndices(gis, &igis)); 373 PetscCall(ISDestroy(&sis)); 374 PetscCall(ISDestroy(&gis)); 375 PetscFunctionReturn(PETSC_SUCCESS); 376 } 377 378 /* 379 Local utility routine that creates a mapping from the global column 380 number to the local number in the off-diagonal part of the local 381 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 382 a slightly higher hash table cost; without it it is not scalable (each processor 383 has an order N integer array but is fast to access. 384 */ 385 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) 386 { 387 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 388 PetscInt n = aij->B->cmap->n, i; 389 390 PetscFunctionBegin; 391 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 392 #if defined(PETSC_USE_CTABLE) 393 PetscCall(PetscHMapICreateWithSize(n, &aij->colmap)); 394 for (i = 0; i < n; i++) PetscCall(PetscHMapISet(aij->colmap, aij->garray[i] + 1, i + 1)); 395 #else 396 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 397 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 398 #endif 399 PetscFunctionReturn(PETSC_SUCCESS); 400 } 401 402 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 403 do { \ 404 if (col <= lastcol1) low1 = 0; \ 405 else high1 = nrow1; \ 406 lastcol1 = col; \ 407 while (high1 - low1 > 5) { \ 408 t = (low1 + high1) / 2; \ 409 if (rp1[t] > col) high1 = t; \ 410 else low1 = t; \ 411 } \ 412 for (_i = low1; _i < high1; _i++) { \ 413 if (rp1[_i] > col) break; \ 414 if (rp1[_i] == col) { \ 415 if (addv == ADD_VALUES) { \ 416 ap1[_i] += value; \ 417 /* Not sure LogFlops will slow dow the code or not */ \ 418 (void)PetscLogFlops(1.0); \ 419 } else ap1[_i] = value; \ 420 goto a_noinsert; \ 421 } \ 422 } \ 423 if (value == 0.0 && ignorezeroentries && row != col) { \ 424 low1 = 0; \ 425 high1 = nrow1; \ 426 goto a_noinsert; \ 427 } \ 428 if (nonew == 1) { \ 429 low1 = 0; \ 430 high1 = nrow1; \ 431 goto a_noinsert; \ 432 } \ 433 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 434 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 435 N = nrow1++ - 1; \ 436 a->nz++; \ 437 high1++; \ 438 /* shift up all the later entries in this row */ \ 439 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 440 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 441 rp1[_i] = col; \ 442 ap1[_i] = value; \ 443 a_noinsert:; \ 444 ailen[row] = nrow1; \ 445 } while (0) 446 447 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 448 do { \ 449 if (col <= lastcol2) low2 = 0; \ 450 else high2 = nrow2; \ 451 lastcol2 = col; \ 452 while (high2 - low2 > 5) { \ 453 t = (low2 + high2) / 2; \ 454 if (rp2[t] > col) high2 = t; \ 455 else low2 = t; \ 456 } \ 457 for (_i = low2; _i < high2; _i++) { \ 458 if (rp2[_i] > col) break; \ 459 if (rp2[_i] == col) { \ 460 if (addv == ADD_VALUES) { \ 461 ap2[_i] += value; \ 462 (void)PetscLogFlops(1.0); \ 463 } else ap2[_i] = value; \ 464 goto b_noinsert; \ 465 } \ 466 } \ 467 if (value == 0.0 && ignorezeroentries) { \ 468 low2 = 0; \ 469 high2 = nrow2; \ 470 goto b_noinsert; \ 471 } \ 472 if (nonew == 1) { \ 473 low2 = 0; \ 474 high2 = nrow2; \ 475 goto b_noinsert; \ 476 } \ 477 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 478 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 479 N = nrow2++ - 1; \ 480 b->nz++; \ 481 high2++; \ 482 /* shift up all the later entries in this row */ \ 483 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 484 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 485 rp2[_i] = col; \ 486 ap2[_i] = value; \ 487 b_noinsert:; \ 488 bilen[row] = nrow2; \ 489 } while (0) 490 491 static PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) 492 { 493 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 494 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 495 PetscInt l, *garray = mat->garray, diag; 496 PetscScalar *aa, *ba; 497 498 PetscFunctionBegin; 499 /* code only works for square matrices A */ 500 501 /* find size of row to the left of the diagonal part */ 502 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 503 row = row - diag; 504 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 505 if (garray[b->j[b->i[row] + l]] > diag) break; 506 } 507 if (l) { 508 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 509 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 510 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 511 } 512 513 /* diagonal part */ 514 if (a->i[row + 1] - a->i[row]) { 515 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 516 PetscCall(PetscArraycpy(aa + a->i[row], v + l, a->i[row + 1] - a->i[row])); 517 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 518 } 519 520 /* right of diagonal part */ 521 if (b->i[row + 1] - b->i[row] - l) { 522 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 523 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 524 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 525 } 526 PetscFunctionReturn(PETSC_SUCCESS); 527 } 528 529 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 530 { 531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 532 PetscScalar value = 0.0; 533 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 534 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 535 PetscBool roworiented = aij->roworiented; 536 537 /* Some Variables required in the macro */ 538 Mat A = aij->A; 539 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 540 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 541 PetscBool ignorezeroentries = a->ignorezeroentries; 542 Mat B = aij->B; 543 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 544 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 545 MatScalar *aa, *ba; 546 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 547 PetscInt nonew; 548 MatScalar *ap1, *ap2; 549 550 PetscFunctionBegin; 551 PetscCall(MatSeqAIJGetArray(A, &aa)); 552 PetscCall(MatSeqAIJGetArray(B, &ba)); 553 for (i = 0; i < m; i++) { 554 if (im[i] < 0) continue; 555 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 556 if (im[i] >= rstart && im[i] < rend) { 557 row = im[i] - rstart; 558 lastcol1 = -1; 559 rp1 = PetscSafePointerPlusOffset(aj, ai[row]); 560 ap1 = PetscSafePointerPlusOffset(aa, ai[row]); 561 rmax1 = aimax[row]; 562 nrow1 = ailen[row]; 563 low1 = 0; 564 high1 = nrow1; 565 lastcol2 = -1; 566 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 567 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 568 rmax2 = bimax[row]; 569 nrow2 = bilen[row]; 570 low2 = 0; 571 high2 = nrow2; 572 573 for (j = 0; j < n; j++) { 574 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 575 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 576 if (in[j] >= cstart && in[j] < cend) { 577 col = in[j] - cstart; 578 nonew = a->nonew; 579 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 580 } else if (in[j] < 0) { 581 continue; 582 } else { 583 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 584 if (mat->was_assembled) { 585 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 586 #if defined(PETSC_USE_CTABLE) 587 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); /* map global col ids to local ones */ 588 col--; 589 #else 590 col = aij->colmap[in[j]] - 1; 591 #endif 592 if (col < 0 && !((Mat_SeqAIJ *)aij->B->data)->nonew) { /* col < 0 means in[j] is a new col for B */ 593 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); /* Change aij->B from reduced/local format to expanded/global format */ 594 col = in[j]; 595 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 596 B = aij->B; 597 b = (Mat_SeqAIJ *)B->data; 598 bimax = b->imax; 599 bi = b->i; 600 bilen = b->ilen; 601 bj = b->j; 602 ba = b->a; 603 rp2 = PetscSafePointerPlusOffset(bj, bi[row]); 604 ap2 = PetscSafePointerPlusOffset(ba, bi[row]); 605 rmax2 = bimax[row]; 606 nrow2 = bilen[row]; 607 low2 = 0; 608 high2 = nrow2; 609 bm = aij->B->rmap->n; 610 ba = b->a; 611 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 612 if (1 == ((Mat_SeqAIJ *)aij->B->data)->nonew) { 613 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 614 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 615 } 616 } else col = in[j]; 617 nonew = b->nonew; 618 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 619 } 620 } 621 } else { 622 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 623 if (!aij->donotstash) { 624 mat->assembled = PETSC_FALSE; 625 if (roworiented) { 626 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i * n), (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 627 } else { 628 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, PetscSafePointerPlusOffset(v, i), m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 629 } 630 } 631 } 632 } 633 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 634 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 635 PetscFunctionReturn(PETSC_SUCCESS); 636 } 637 638 /* 639 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 640 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 641 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 642 */ 643 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) 644 { 645 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 646 Mat A = aij->A; /* diagonal part of the matrix */ 647 Mat B = aij->B; /* off-diagonal part of the matrix */ 648 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 649 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 650 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 651 PetscInt *ailen = a->ilen, *aj = a->j; 652 PetscInt *bilen = b->ilen, *bj = b->j; 653 PetscInt am = aij->A->rmap->n, j; 654 PetscInt diag_so_far = 0, dnz; 655 PetscInt offd_so_far = 0, onz; 656 657 PetscFunctionBegin; 658 /* Iterate over all rows of the matrix */ 659 for (j = 0; j < am; j++) { 660 dnz = onz = 0; 661 /* Iterate over all non-zero columns of the current row */ 662 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 663 /* If column is in the diagonal */ 664 if (mat_j[col] >= cstart && mat_j[col] < cend) { 665 aj[diag_so_far++] = mat_j[col] - cstart; 666 dnz++; 667 } else { /* off-diagonal entries */ 668 bj[offd_so_far++] = mat_j[col]; 669 onz++; 670 } 671 } 672 ailen[j] = dnz; 673 bilen[j] = onz; 674 } 675 PetscFunctionReturn(PETSC_SUCCESS); 676 } 677 678 /* 679 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 680 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 681 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 682 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 683 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 684 */ 685 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) 686 { 687 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 688 Mat A = aij->A; /* diagonal part of the matrix */ 689 Mat B = aij->B; /* off-diagonal part of the matrix */ 690 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)aij->A->data, *aijo = (Mat_SeqAIJ *)aij->B->data; 691 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 692 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 693 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 694 PetscInt *ailen = a->ilen, *aj = a->j; 695 PetscInt *bilen = b->ilen, *bj = b->j; 696 PetscInt am = aij->A->rmap->n, j; 697 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 698 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 699 PetscScalar *aa = a->a, *ba = b->a; 700 701 PetscFunctionBegin; 702 /* Iterate over all rows of the matrix */ 703 for (j = 0; j < am; j++) { 704 dnz_row = onz_row = 0; 705 rowstart_offd = full_offd_i[j]; 706 rowstart_diag = full_diag_i[j]; 707 /* Iterate over all non-zero columns of the current row */ 708 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 709 /* If column is in the diagonal */ 710 if (mat_j[col] >= cstart && mat_j[col] < cend) { 711 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 712 aa[rowstart_diag + dnz_row] = mat_a[col]; 713 dnz_row++; 714 } else { /* off-diagonal entries */ 715 bj[rowstart_offd + onz_row] = mat_j[col]; 716 ba[rowstart_offd + onz_row] = mat_a[col]; 717 onz_row++; 718 } 719 } 720 ailen[j] = dnz_row; 721 bilen[j] = onz_row; 722 } 723 PetscFunctionReturn(PETSC_SUCCESS); 724 } 725 726 static PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 727 { 728 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 729 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 730 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 731 732 PetscFunctionBegin; 733 for (i = 0; i < m; i++) { 734 if (idxm[i] < 0) continue; /* negative row */ 735 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 736 PetscCheck(idxm[i] >= rstart && idxm[i] < rend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported, row requested %" PetscInt_FMT " range [%" PetscInt_FMT " %" PetscInt_FMT ")", idxm[i], rstart, rend); 737 row = idxm[i] - rstart; 738 for (j = 0; j < n; j++) { 739 if (idxn[j] < 0) continue; /* negative column */ 740 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 741 if (idxn[j] >= cstart && idxn[j] < cend) { 742 col = idxn[j] - cstart; 743 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 744 } else { 745 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 746 #if defined(PETSC_USE_CTABLE) 747 PetscCall(PetscHMapIGetWithDefault(aij->colmap, idxn[j] + 1, 0, &col)); 748 col--; 749 #else 750 col = aij->colmap[idxn[j]] - 1; 751 #endif 752 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 753 else PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); 754 } 755 } 756 } 757 PetscFunctionReturn(PETSC_SUCCESS); 758 } 759 760 static PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) 761 { 762 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 763 PetscInt nstash, reallocs; 764 765 PetscFunctionBegin; 766 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 767 768 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 769 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 770 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 771 PetscFunctionReturn(PETSC_SUCCESS); 772 } 773 774 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) 775 { 776 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 777 PetscMPIInt n; 778 PetscInt i, j, rstart, ncols, flg; 779 PetscInt *row, *col; 780 PetscBool other_disassembled; 781 PetscScalar *val; 782 783 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 784 785 PetscFunctionBegin; 786 if (!aij->donotstash && !mat->nooffprocentries) { 787 while (1) { 788 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 789 if (!flg) break; 790 791 for (i = 0; i < n;) { 792 /* Now identify the consecutive vals belonging to the same row */ 793 for (j = i, rstart = row[j]; j < n; j++) { 794 if (row[j] != rstart) break; 795 } 796 if (j < n) ncols = j - i; 797 else ncols = n - i; 798 /* Now assemble all these values with a single function call */ 799 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 800 i = j; 801 } 802 } 803 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 804 } 805 #if defined(PETSC_HAVE_DEVICE) 806 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 807 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 808 if (mat->boundtocpu) { 809 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 810 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 811 } 812 #endif 813 PetscCall(MatAssemblyBegin(aij->A, mode)); 814 PetscCall(MatAssemblyEnd(aij->A, mode)); 815 816 /* determine if any processor has disassembled, if so we must 817 also disassemble ourself, in order that we may reassemble. */ 818 /* 819 if nonzero structure of submatrix B cannot change then we know that 820 no processor disassembled thus we can skip this stuff 821 */ 822 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 823 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 824 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globally it does not */ 825 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 826 } 827 } 828 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 829 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 830 #if defined(PETSC_HAVE_DEVICE) 831 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 832 #endif 833 PetscCall(MatAssemblyBegin(aij->B, mode)); 834 PetscCall(MatAssemblyEnd(aij->B, mode)); 835 836 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 837 838 aij->rowvalues = NULL; 839 840 PetscCall(VecDestroy(&aij->diag)); 841 842 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 843 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)aij->A->data)->nonew) { 844 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 845 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 846 } 847 #if defined(PETSC_HAVE_DEVICE) 848 mat->offloadmask = PETSC_OFFLOAD_BOTH; 849 #endif 850 PetscFunctionReturn(PETSC_SUCCESS); 851 } 852 853 static PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) 854 { 855 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 856 857 PetscFunctionBegin; 858 PetscCall(MatZeroEntries(l->A)); 859 PetscCall(MatZeroEntries(l->B)); 860 PetscFunctionReturn(PETSC_SUCCESS); 861 } 862 863 static PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 864 { 865 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 866 PetscInt *lrows; 867 PetscInt r, len; 868 PetscBool cong; 869 870 PetscFunctionBegin; 871 /* get locally owned rows */ 872 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 /* fix right-hand side if needed */ 875 if (x && b) { 876 const PetscScalar *xx; 877 PetscScalar *bb; 878 879 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 880 PetscCall(VecGetArrayRead(x, &xx)); 881 PetscCall(VecGetArray(b, &bb)); 882 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 883 PetscCall(VecRestoreArrayRead(x, &xx)); 884 PetscCall(VecRestoreArray(b, &bb)); 885 } 886 887 if (diag != 0.0 && cong) { 888 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 889 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 890 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 891 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 892 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 893 PetscInt nnwA, nnwB; 894 PetscBool nnzA, nnzB; 895 896 nnwA = aijA->nonew; 897 nnwB = aijB->nonew; 898 nnzA = aijA->keepnonzeropattern; 899 nnzB = aijB->keepnonzeropattern; 900 if (!nnzA) { 901 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 902 aijA->nonew = 0; 903 } 904 if (!nnzB) { 905 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 906 aijB->nonew = 0; 907 } 908 /* Must zero here before the next loop */ 909 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 910 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 911 for (r = 0; r < len; ++r) { 912 const PetscInt row = lrows[r] + A->rmap->rstart; 913 if (row >= A->cmap->N) continue; 914 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 915 } 916 aijA->nonew = nnwA; 917 aijB->nonew = nnwB; 918 } else { 919 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 920 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 921 } 922 PetscCall(PetscFree(lrows)); 923 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 924 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 925 926 /* only change matrix nonzero state if pattern was allowed to be changed */ 927 if (!((Mat_SeqAIJ *)mat->A->data)->keepnonzeropattern || !((Mat_SeqAIJ *)mat->A->data)->nonew) { 928 PetscObjectState state = mat->A->nonzerostate + mat->B->nonzerostate; 929 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 930 } 931 PetscFunctionReturn(PETSC_SUCCESS); 932 } 933 934 static PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 935 { 936 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 937 PetscInt n = A->rmap->n; 938 PetscInt i, j, r, m, len = 0; 939 PetscInt *lrows, *owners = A->rmap->range; 940 PetscMPIInt p = 0; 941 PetscSFNode *rrows; 942 PetscSF sf; 943 const PetscScalar *xx; 944 PetscScalar *bb, *mask, *aij_a; 945 Vec xmask, lmask; 946 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 947 const PetscInt *aj, *ii, *ridx; 948 PetscScalar *aa; 949 950 PetscFunctionBegin; 951 /* Create SF where leaves are input rows and roots are owned rows */ 952 PetscCall(PetscMalloc1(n, &lrows)); 953 for (r = 0; r < n; ++r) lrows[r] = -1; 954 PetscCall(PetscMalloc1(N, &rrows)); 955 for (r = 0; r < N; ++r) { 956 const PetscInt idx = rows[r]; 957 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 958 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 959 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 960 } 961 rrows[r].rank = p; 962 rrows[r].index = rows[r] - owners[p]; 963 } 964 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 965 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 966 /* Collect flags for rows to be zeroed */ 967 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 968 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 969 PetscCall(PetscSFDestroy(&sf)); 970 /* Compress and put in row numbers */ 971 for (r = 0; r < n; ++r) 972 if (lrows[r] >= 0) lrows[len++] = r; 973 /* zero diagonal part of matrix */ 974 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 975 /* handle off-diagonal part of matrix */ 976 PetscCall(MatCreateVecs(A, &xmask, NULL)); 977 PetscCall(VecDuplicate(l->lvec, &lmask)); 978 PetscCall(VecGetArray(xmask, &bb)); 979 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 980 PetscCall(VecRestoreArray(xmask, &bb)); 981 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 982 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 983 PetscCall(VecDestroy(&xmask)); 984 if (x && b) { /* this code is buggy when the row and column layout don't match */ 985 PetscBool cong; 986 987 PetscCall(MatHasCongruentLayouts(A, &cong)); 988 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 989 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 990 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 991 PetscCall(VecGetArrayRead(l->lvec, &xx)); 992 PetscCall(VecGetArray(b, &bb)); 993 } 994 PetscCall(VecGetArray(lmask, &mask)); 995 /* remove zeroed rows of off-diagonal matrix */ 996 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 997 ii = aij->i; 998 for (i = 0; i < len; i++) PetscCall(PetscArrayzero(PetscSafePointerPlusOffset(aij_a, ii[lrows[i]]), ii[lrows[i] + 1] - ii[lrows[i]])); 999 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1000 if (aij->compressedrow.use) { 1001 m = aij->compressedrow.nrows; 1002 ii = aij->compressedrow.i; 1003 ridx = aij->compressedrow.rindex; 1004 for (i = 0; i < m; i++) { 1005 n = ii[i + 1] - ii[i]; 1006 aj = aij->j + ii[i]; 1007 aa = aij_a + ii[i]; 1008 1009 for (j = 0; j < n; j++) { 1010 if (PetscAbsScalar(mask[*aj])) { 1011 if (b) bb[*ridx] -= *aa * xx[*aj]; 1012 *aa = 0.0; 1013 } 1014 aa++; 1015 aj++; 1016 } 1017 ridx++; 1018 } 1019 } else { /* do not use compressed row format */ 1020 m = l->B->rmap->n; 1021 for (i = 0; i < m; i++) { 1022 n = ii[i + 1] - ii[i]; 1023 aj = aij->j + ii[i]; 1024 aa = aij_a + ii[i]; 1025 for (j = 0; j < n; j++) { 1026 if (PetscAbsScalar(mask[*aj])) { 1027 if (b) bb[i] -= *aa * xx[*aj]; 1028 *aa = 0.0; 1029 } 1030 aa++; 1031 aj++; 1032 } 1033 } 1034 } 1035 if (x && b) { 1036 PetscCall(VecRestoreArray(b, &bb)); 1037 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1038 } 1039 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 1040 PetscCall(VecRestoreArray(lmask, &mask)); 1041 PetscCall(VecDestroy(&lmask)); 1042 PetscCall(PetscFree(lrows)); 1043 1044 /* only change matrix nonzero state if pattern was allowed to be changed */ 1045 if (!((Mat_SeqAIJ *)l->A->data)->nonew) { 1046 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1047 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1048 } 1049 PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 static PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) 1053 { 1054 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1055 PetscInt nt; 1056 VecScatter Mvctx = a->Mvctx; 1057 1058 PetscFunctionBegin; 1059 PetscCall(VecGetLocalSize(xx, &nt)); 1060 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 1061 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1062 PetscUseTypeMethod(a->A, mult, xx, yy); 1063 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1064 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 1065 PetscFunctionReturn(PETSC_SUCCESS); 1066 } 1067 1068 static PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) 1069 { 1070 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1071 1072 PetscFunctionBegin; 1073 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 1074 PetscFunctionReturn(PETSC_SUCCESS); 1075 } 1076 1077 static PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1078 { 1079 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1080 VecScatter Mvctx = a->Mvctx; 1081 1082 PetscFunctionBegin; 1083 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1084 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1085 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1086 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1087 PetscFunctionReturn(PETSC_SUCCESS); 1088 } 1089 1090 static PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) 1091 { 1092 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1093 1094 PetscFunctionBegin; 1095 /* do nondiagonal part */ 1096 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1097 /* do local part */ 1098 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1099 /* add partial results together */ 1100 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1101 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1102 PetscFunctionReturn(PETSC_SUCCESS); 1103 } 1104 1105 static PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) 1106 { 1107 MPI_Comm comm; 1108 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij = (Mat_MPIAIJ *)Bmat->data; 1109 Mat Adia = Aij->A, Bdia = Bij->A, Aoff, Boff, *Aoffs, *Boffs; 1110 IS Me, Notme; 1111 PetscInt M, N, first, last, *notme, i; 1112 PetscBool lf; 1113 PetscMPIInt size; 1114 1115 PetscFunctionBegin; 1116 /* Easy test: symmetric diagonal block */ 1117 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1118 PetscCallMPI(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1119 if (!*f) PetscFunctionReturn(PETSC_SUCCESS); 1120 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1121 PetscCallMPI(MPI_Comm_size(comm, &size)); 1122 if (size == 1) PetscFunctionReturn(PETSC_SUCCESS); 1123 1124 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1125 PetscCall(MatGetSize(Amat, &M, &N)); 1126 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1127 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1128 for (i = 0; i < first; i++) notme[i] = i; 1129 for (i = last; i < M; i++) notme[i - last + first] = i; 1130 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1131 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1132 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1133 Aoff = Aoffs[0]; 1134 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1135 Boff = Boffs[0]; 1136 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1137 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1138 PetscCall(MatDestroyMatrices(1, &Boffs)); 1139 PetscCall(ISDestroy(&Me)); 1140 PetscCall(ISDestroy(&Notme)); 1141 PetscCall(PetscFree(notme)); 1142 PetscFunctionReturn(PETSC_SUCCESS); 1143 } 1144 1145 static PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1146 { 1147 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1148 1149 PetscFunctionBegin; 1150 /* do nondiagonal part */ 1151 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1152 /* do local part */ 1153 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1154 /* add partial results together */ 1155 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1156 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1157 PetscFunctionReturn(PETSC_SUCCESS); 1158 } 1159 1160 /* 1161 This only works correctly for square matrices where the subblock A->A is the 1162 diagonal block 1163 */ 1164 static PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) 1165 { 1166 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1167 1168 PetscFunctionBegin; 1169 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1170 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1171 PetscCall(MatGetDiagonal(a->A, v)); 1172 PetscFunctionReturn(PETSC_SUCCESS); 1173 } 1174 1175 static PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) 1176 { 1177 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1178 1179 PetscFunctionBegin; 1180 PetscCall(MatScale(a->A, aa)); 1181 PetscCall(MatScale(a->B, aa)); 1182 PetscFunctionReturn(PETSC_SUCCESS); 1183 } 1184 1185 static PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 1186 { 1187 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1188 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1189 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1190 const PetscInt *garray = aij->garray; 1191 const PetscScalar *aa, *ba; 1192 PetscInt header[4], M, N, m, rs, cs, cnt, i, ja, jb; 1193 PetscInt64 nz, hnz; 1194 PetscInt *rowlens; 1195 PetscInt *colidxs; 1196 PetscScalar *matvals; 1197 PetscMPIInt rank; 1198 1199 PetscFunctionBegin; 1200 PetscCall(PetscViewerSetUp(viewer)); 1201 1202 M = mat->rmap->N; 1203 N = mat->cmap->N; 1204 m = mat->rmap->n; 1205 rs = mat->rmap->rstart; 1206 cs = mat->cmap->rstart; 1207 nz = A->nz + B->nz; 1208 1209 /* write matrix header */ 1210 header[0] = MAT_FILE_CLASSID; 1211 header[1] = M; 1212 header[2] = N; 1213 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1214 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1215 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1216 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1217 1218 /* fill in and store row lengths */ 1219 PetscCall(PetscMalloc1(m, &rowlens)); 1220 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1221 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1222 PetscCall(PetscFree(rowlens)); 1223 1224 /* fill in and store column indices */ 1225 PetscCall(PetscMalloc1(nz, &colidxs)); 1226 for (cnt = 0, i = 0; i < m; i++) { 1227 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1228 if (garray[B->j[jb]] > cs) break; 1229 colidxs[cnt++] = garray[B->j[jb]]; 1230 } 1231 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1232 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1233 } 1234 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1235 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1236 PetscCall(PetscFree(colidxs)); 1237 1238 /* fill in and store nonzero values */ 1239 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1240 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1241 PetscCall(PetscMalloc1(nz, &matvals)); 1242 for (cnt = 0, i = 0; i < m; i++) { 1243 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1244 if (garray[B->j[jb]] > cs) break; 1245 matvals[cnt++] = ba[jb]; 1246 } 1247 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1248 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1249 } 1250 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1251 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1252 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1253 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1254 PetscCall(PetscFree(matvals)); 1255 1256 /* write block size option to the viewer's .info file */ 1257 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 #include <petscdraw.h> 1262 static PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1263 { 1264 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1265 PetscMPIInt rank = aij->rank, size = aij->size; 1266 PetscBool isdraw, iascii, isbinary; 1267 PetscViewer sviewer; 1268 PetscViewerFormat format; 1269 1270 PetscFunctionBegin; 1271 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1272 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1273 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1274 if (iascii) { 1275 PetscCall(PetscViewerGetFormat(viewer, &format)); 1276 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1277 PetscInt i, nmax = 0, nmin = PETSC_INT_MAX, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)aij->A->data)->nz + ((Mat_SeqAIJ *)aij->B->data)->nz; 1278 PetscCall(PetscMalloc1(size, &nz)); 1279 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1280 for (i = 0; i < size; i++) { 1281 nmax = PetscMax(nmax, nz[i]); 1282 nmin = PetscMin(nmin, nz[i]); 1283 navg += nz[i]; 1284 } 1285 PetscCall(PetscFree(nz)); 1286 navg = navg / size; 1287 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1288 PetscFunctionReturn(PETSC_SUCCESS); 1289 } 1290 PetscCall(PetscViewerGetFormat(viewer, &format)); 1291 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1292 MatInfo info; 1293 PetscInt *inodes = NULL; 1294 1295 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1296 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1297 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1298 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1299 if (!inodes) { 1300 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1301 info.memory)); 1302 } else { 1303 PetscCall( 1304 PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, info.memory)); 1305 } 1306 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1307 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1308 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1309 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1310 PetscCall(PetscViewerFlush(viewer)); 1311 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1312 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1313 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1314 PetscFunctionReturn(PETSC_SUCCESS); 1315 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1316 PetscInt inodecount, inodelimit, *inodes; 1317 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1318 if (inodes) { 1319 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1320 } else { 1321 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1322 } 1323 PetscFunctionReturn(PETSC_SUCCESS); 1324 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1325 PetscFunctionReturn(PETSC_SUCCESS); 1326 } 1327 } else if (isbinary) { 1328 if (size == 1) { 1329 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1330 PetscCall(MatView(aij->A, viewer)); 1331 } else { 1332 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1333 } 1334 PetscFunctionReturn(PETSC_SUCCESS); 1335 } else if (iascii && size == 1) { 1336 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1337 PetscCall(MatView(aij->A, viewer)); 1338 PetscFunctionReturn(PETSC_SUCCESS); 1339 } else if (isdraw) { 1340 PetscDraw draw; 1341 PetscBool isnull; 1342 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1343 PetscCall(PetscDrawIsNull(draw, &isnull)); 1344 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1345 } 1346 1347 { /* assemble the entire matrix onto first processor */ 1348 Mat A = NULL, Av; 1349 IS isrow, iscol; 1350 1351 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1352 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1353 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1354 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1355 /* The commented code uses MatCreateSubMatrices instead */ 1356 /* 1357 Mat *AA, A = NULL, Av; 1358 IS isrow,iscol; 1359 1360 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1361 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1362 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1363 if (rank == 0) { 1364 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1365 A = AA[0]; 1366 Av = AA[0]; 1367 } 1368 PetscCall(MatDestroySubMatrices(1,&AA)); 1369 */ 1370 PetscCall(ISDestroy(&iscol)); 1371 PetscCall(ISDestroy(&isrow)); 1372 /* 1373 Everyone has to call to draw the matrix since the graphics waits are 1374 synchronized across all processors that share the PetscDraw object 1375 */ 1376 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1377 if (rank == 0) { 1378 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); 1379 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1380 } 1381 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1382 PetscCall(MatDestroy(&A)); 1383 } 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) 1388 { 1389 PetscBool iascii, isdraw, issocket, isbinary; 1390 1391 PetscFunctionBegin; 1392 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1393 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1394 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1395 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1396 if (iascii || isdraw || isbinary || issocket) PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 1401 { 1402 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1403 Vec bb1 = NULL; 1404 PetscBool hasop; 1405 1406 PetscFunctionBegin; 1407 if (flag == SOR_APPLY_UPPER) { 1408 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) PetscCall(VecDuplicate(bb, &bb1)); 1413 1414 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1415 if (flag & SOR_ZERO_INITIAL_GUESS) { 1416 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1417 its--; 1418 } 1419 1420 while (its--) { 1421 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1422 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1423 1424 /* update rhs: bb1 = bb - B*x */ 1425 PetscCall(VecScale(mat->lvec, -1.0)); 1426 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1427 1428 /* local sweep */ 1429 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1430 } 1431 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1432 if (flag & SOR_ZERO_INITIAL_GUESS) { 1433 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1434 its--; 1435 } 1436 while (its--) { 1437 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1438 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1439 1440 /* update rhs: bb1 = bb - B*x */ 1441 PetscCall(VecScale(mat->lvec, -1.0)); 1442 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1443 1444 /* local sweep */ 1445 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1446 } 1447 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1448 if (flag & SOR_ZERO_INITIAL_GUESS) { 1449 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1450 its--; 1451 } 1452 while (its--) { 1453 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1454 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1455 1456 /* update rhs: bb1 = bb - B*x */ 1457 PetscCall(VecScale(mat->lvec, -1.0)); 1458 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1459 1460 /* local sweep */ 1461 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1462 } 1463 } else if (flag & SOR_EISENSTAT) { 1464 Vec xx1; 1465 1466 PetscCall(VecDuplicate(bb, &xx1)); 1467 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1468 1469 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1470 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1471 if (!mat->diag) { 1472 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1473 PetscCall(MatGetDiagonal(matin, mat->diag)); 1474 } 1475 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1476 if (hasop) { 1477 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1478 } else { 1479 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1480 } 1481 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1482 1483 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1484 1485 /* local sweep */ 1486 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1487 PetscCall(VecAXPY(xx, 1.0, xx1)); 1488 PetscCall(VecDestroy(&xx1)); 1489 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1490 1491 PetscCall(VecDestroy(&bb1)); 1492 1493 matin->factorerrortype = mat->A->factorerrortype; 1494 PetscFunctionReturn(PETSC_SUCCESS); 1495 } 1496 1497 static PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) 1498 { 1499 Mat aA, aB, Aperm; 1500 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1501 PetscScalar *aa, *ba; 1502 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1503 PetscSF rowsf, sf; 1504 IS parcolp = NULL; 1505 PetscBool done; 1506 1507 PetscFunctionBegin; 1508 PetscCall(MatGetLocalSize(A, &m, &n)); 1509 PetscCall(ISGetIndices(rowp, &rwant)); 1510 PetscCall(ISGetIndices(colp, &cwant)); 1511 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1512 1513 /* Invert row permutation to find out where my rows should go */ 1514 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1515 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1516 PetscCall(PetscSFSetFromOptions(rowsf)); 1517 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1518 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1519 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1520 1521 /* Invert column permutation to find out where my columns should go */ 1522 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1523 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1524 PetscCall(PetscSFSetFromOptions(sf)); 1525 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1526 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1527 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1528 PetscCall(PetscSFDestroy(&sf)); 1529 1530 PetscCall(ISRestoreIndices(rowp, &rwant)); 1531 PetscCall(ISRestoreIndices(colp, &cwant)); 1532 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1533 1534 /* Find out where my gcols should go */ 1535 PetscCall(MatGetSize(aB, NULL, &ng)); 1536 PetscCall(PetscMalloc1(ng, &gcdest)); 1537 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1538 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1539 PetscCall(PetscSFSetFromOptions(sf)); 1540 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1541 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1542 PetscCall(PetscSFDestroy(&sf)); 1543 1544 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1545 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1546 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1547 for (i = 0; i < m; i++) { 1548 PetscInt row = rdest[i]; 1549 PetscMPIInt rowner; 1550 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 PetscInt col = cdest[aj[j]]; 1553 PetscMPIInt cowner; 1554 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1555 if (rowner == cowner) dnnz[i]++; 1556 else onnz[i]++; 1557 } 1558 for (j = bi[i]; j < bi[i + 1]; j++) { 1559 PetscInt col = gcdest[bj[j]]; 1560 PetscMPIInt cowner; 1561 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1562 if (rowner == cowner) dnnz[i]++; 1563 else onnz[i]++; 1564 } 1565 } 1566 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1567 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1568 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1569 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1570 PetscCall(PetscSFDestroy(&rowsf)); 1571 1572 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1573 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1574 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1575 for (i = 0; i < m; i++) { 1576 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1577 PetscInt j0, rowlen; 1578 rowlen = ai[i + 1] - ai[i]; 1579 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1580 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1581 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1582 } 1583 rowlen = bi[i + 1] - bi[i]; 1584 for (j0 = j = 0; j < rowlen; j0 = j) { 1585 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1586 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1587 } 1588 } 1589 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1590 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1591 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1592 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1593 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1594 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1595 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1596 PetscCall(PetscFree3(work, rdest, cdest)); 1597 PetscCall(PetscFree(gcdest)); 1598 if (parcolp) PetscCall(ISDestroy(&colp)); 1599 *B = Aperm; 1600 PetscFunctionReturn(PETSC_SUCCESS); 1601 } 1602 1603 static PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 1604 { 1605 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1606 1607 PetscFunctionBegin; 1608 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1609 if (ghosts) *ghosts = aij->garray; 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 static PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1614 { 1615 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1616 Mat A = mat->A, B = mat->B; 1617 PetscLogDouble isend[5], irecv[5]; 1618 1619 PetscFunctionBegin; 1620 info->block_size = 1.0; 1621 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1622 1623 isend[0] = info->nz_used; 1624 isend[1] = info->nz_allocated; 1625 isend[2] = info->nz_unneeded; 1626 isend[3] = info->memory; 1627 isend[4] = info->mallocs; 1628 1629 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1630 1631 isend[0] += info->nz_used; 1632 isend[1] += info->nz_allocated; 1633 isend[2] += info->nz_unneeded; 1634 isend[3] += info->memory; 1635 isend[4] += info->mallocs; 1636 if (flag == MAT_LOCAL) { 1637 info->nz_used = isend[0]; 1638 info->nz_allocated = isend[1]; 1639 info->nz_unneeded = isend[2]; 1640 info->memory = isend[3]; 1641 info->mallocs = isend[4]; 1642 } else if (flag == MAT_GLOBAL_MAX) { 1643 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1644 1645 info->nz_used = irecv[0]; 1646 info->nz_allocated = irecv[1]; 1647 info->nz_unneeded = irecv[2]; 1648 info->memory = irecv[3]; 1649 info->mallocs = irecv[4]; 1650 } else if (flag == MAT_GLOBAL_SUM) { 1651 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1652 1653 info->nz_used = irecv[0]; 1654 info->nz_allocated = irecv[1]; 1655 info->nz_unneeded = irecv[2]; 1656 info->memory = irecv[3]; 1657 info->mallocs = irecv[4]; 1658 } 1659 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1660 info->fill_ratio_needed = 0; 1661 info->factor_mallocs = 0; 1662 PetscFunctionReturn(PETSC_SUCCESS); 1663 } 1664 1665 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) 1666 { 1667 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1668 1669 PetscFunctionBegin; 1670 switch (op) { 1671 case MAT_NEW_NONZERO_LOCATIONS: 1672 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1673 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1674 case MAT_KEEP_NONZERO_PATTERN: 1675 case MAT_NEW_NONZERO_LOCATION_ERR: 1676 case MAT_USE_INODES: 1677 case MAT_IGNORE_ZERO_ENTRIES: 1678 case MAT_FORM_EXPLICIT_TRANSPOSE: 1679 MatCheckPreallocated(A, 1); 1680 PetscCall(MatSetOption(a->A, op, flg)); 1681 PetscCall(MatSetOption(a->B, op, flg)); 1682 break; 1683 case MAT_ROW_ORIENTED: 1684 MatCheckPreallocated(A, 1); 1685 a->roworiented = flg; 1686 1687 PetscCall(MatSetOption(a->A, op, flg)); 1688 PetscCall(MatSetOption(a->B, op, flg)); 1689 break; 1690 case MAT_IGNORE_OFF_PROC_ENTRIES: 1691 a->donotstash = flg; 1692 break; 1693 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1694 case MAT_SPD: 1695 case MAT_SYMMETRIC: 1696 case MAT_STRUCTURALLY_SYMMETRIC: 1697 case MAT_HERMITIAN: 1698 case MAT_SYMMETRY_ETERNAL: 1699 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1700 case MAT_SPD_ETERNAL: 1701 /* if the diagonal matrix is square it inherits some of the properties above */ 1702 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1703 break; 1704 case MAT_SUBMAT_SINGLEIS: 1705 A->submat_singleis = flg; 1706 break; 1707 default: 1708 break; 1709 } 1710 PetscFunctionReturn(PETSC_SUCCESS); 1711 } 1712 1713 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1714 { 1715 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1716 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1717 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1718 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1719 PetscInt *cmap, *idx_p; 1720 1721 PetscFunctionBegin; 1722 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1723 mat->getrowactive = PETSC_TRUE; 1724 1725 if (!mat->rowvalues && (idx || v)) { 1726 /* 1727 allocate enough space to hold information from the longest row. 1728 */ 1729 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1730 PetscInt max = 1, tmp; 1731 for (i = 0; i < matin->rmap->n; i++) { 1732 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1733 if (max < tmp) max = tmp; 1734 } 1735 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1736 } 1737 1738 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1739 lrow = row - rstart; 1740 1741 pvA = &vworkA; 1742 pcA = &cworkA; 1743 pvB = &vworkB; 1744 pcB = &cworkB; 1745 if (!v) { 1746 pvA = NULL; 1747 pvB = NULL; 1748 } 1749 if (!idx) { 1750 pcA = NULL; 1751 if (!v) pcB = NULL; 1752 } 1753 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1754 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1755 nztot = nzA + nzB; 1756 1757 cmap = mat->garray; 1758 if (v || idx) { 1759 if (nztot) { 1760 /* Sort by increasing column numbers, assuming A and B already sorted */ 1761 PetscInt imark = -1; 1762 if (v) { 1763 *v = v_p = mat->rowvalues; 1764 for (i = 0; i < nzB; i++) { 1765 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1766 else break; 1767 } 1768 imark = i; 1769 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1770 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1771 } 1772 if (idx) { 1773 *idx = idx_p = mat->rowindices; 1774 if (imark > -1) { 1775 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i]]; 1776 } else { 1777 for (i = 0; i < nzB; i++) { 1778 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1779 else break; 1780 } 1781 imark = i; 1782 } 1783 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1784 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1785 } 1786 } else { 1787 if (idx) *idx = NULL; 1788 if (v) *v = NULL; 1789 } 1790 } 1791 *nz = nztot; 1792 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1793 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1794 PetscFunctionReturn(PETSC_SUCCESS); 1795 } 1796 1797 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1798 { 1799 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1800 1801 PetscFunctionBegin; 1802 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1803 aij->getrowactive = PETSC_FALSE; 1804 PetscFunctionReturn(PETSC_SUCCESS); 1805 } 1806 1807 static PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) 1808 { 1809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1810 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1811 PetscInt i, j, cstart = mat->cmap->rstart; 1812 PetscReal sum = 0.0; 1813 const MatScalar *v, *amata, *bmata; 1814 1815 PetscFunctionBegin; 1816 if (aij->size == 1) { 1817 PetscCall(MatNorm(aij->A, type, norm)); 1818 } else { 1819 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1820 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1821 if (type == NORM_FROBENIUS) { 1822 v = amata; 1823 for (i = 0; i < amat->nz; i++) { 1824 sum += PetscRealPart(PetscConj(*v) * (*v)); 1825 v++; 1826 } 1827 v = bmata; 1828 for (i = 0; i < bmat->nz; i++) { 1829 sum += PetscRealPart(PetscConj(*v) * (*v)); 1830 v++; 1831 } 1832 PetscCallMPI(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1833 *norm = PetscSqrtReal(*norm); 1834 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1835 } else if (type == NORM_1) { /* max column norm */ 1836 PetscReal *tmp; 1837 PetscInt *jj, *garray = aij->garray; 1838 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1839 *norm = 0.0; 1840 v = amata; 1841 jj = amat->j; 1842 for (j = 0; j < amat->nz; j++) { 1843 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1844 v++; 1845 } 1846 v = bmata; 1847 jj = bmat->j; 1848 for (j = 0; j < bmat->nz; j++) { 1849 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1850 v++; 1851 } 1852 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1853 for (j = 0; j < mat->cmap->N; j++) { 1854 if (tmp[j] > *norm) *norm = tmp[j]; 1855 } 1856 PetscCall(PetscFree(tmp)); 1857 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1858 } else if (type == NORM_INFINITY) { /* max row norm */ 1859 PetscReal ntemp = 0.0; 1860 for (j = 0; j < aij->A->rmap->n; j++) { 1861 v = PetscSafePointerPlusOffset(amata, amat->i[j]); 1862 sum = 0.0; 1863 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1864 sum += PetscAbsScalar(*v); 1865 v++; 1866 } 1867 v = PetscSafePointerPlusOffset(bmata, bmat->i[j]); 1868 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1869 sum += PetscAbsScalar(*v); 1870 v++; 1871 } 1872 if (sum > ntemp) ntemp = sum; 1873 } 1874 PetscCallMPI(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1875 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1876 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1877 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1878 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1879 } 1880 PetscFunctionReturn(PETSC_SUCCESS); 1881 } 1882 1883 static PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) 1884 { 1885 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1886 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1887 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1888 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1889 Mat B, A_diag, *B_diag; 1890 const MatScalar *pbv, *bv; 1891 1892 PetscFunctionBegin; 1893 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1894 ma = A->rmap->n; 1895 na = A->cmap->n; 1896 mb = a->B->rmap->n; 1897 nb = a->B->cmap->n; 1898 ai = Aloc->i; 1899 aj = Aloc->j; 1900 bi = Bloc->i; 1901 bj = Bloc->j; 1902 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1903 PetscInt *d_nnz, *g_nnz, *o_nnz; 1904 PetscSFNode *oloc; 1905 PETSC_UNUSED PetscSF sf; 1906 1907 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1908 /* compute d_nnz for preallocation */ 1909 PetscCall(PetscArrayzero(d_nnz, na)); 1910 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1911 /* compute local off-diagonal contributions */ 1912 PetscCall(PetscArrayzero(g_nnz, nb)); 1913 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1914 /* map those to global */ 1915 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1916 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1917 PetscCall(PetscSFSetFromOptions(sf)); 1918 PetscCall(PetscArrayzero(o_nnz, na)); 1919 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1920 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1921 PetscCall(PetscSFDestroy(&sf)); 1922 1923 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1924 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1925 PetscCall(MatSetBlockSizes(B, A->cmap->bs, A->rmap->bs)); 1926 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1927 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1928 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1929 } else { 1930 B = *matout; 1931 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1932 } 1933 1934 b = (Mat_MPIAIJ *)B->data; 1935 A_diag = a->A; 1936 B_diag = &b->A; 1937 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1938 A_diag_ncol = A_diag->cmap->N; 1939 B_diag_ilen = sub_B_diag->ilen; 1940 B_diag_i = sub_B_diag->i; 1941 1942 /* Set ilen for diagonal of B */ 1943 for (i = 0; i < A_diag_ncol; i++) B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; 1944 1945 /* Transpose the diagonal part of the matrix. In contrast to the off-diagonal part, this can be done 1946 very quickly (=without using MatSetValues), because all writes are local. */ 1947 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1948 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1949 1950 /* copy over the B part */ 1951 PetscCall(PetscMalloc1(bi[mb], &cols)); 1952 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1953 pbv = bv; 1954 row = A->rmap->rstart; 1955 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1956 cols_tmp = cols; 1957 for (i = 0; i < mb; i++) { 1958 ncol = bi[i + 1] - bi[i]; 1959 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1960 row++; 1961 if (pbv) pbv += ncol; 1962 if (cols_tmp) cols_tmp += ncol; 1963 } 1964 PetscCall(PetscFree(cols)); 1965 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1966 1967 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1968 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1969 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1970 *matout = B; 1971 } else { 1972 PetscCall(MatHeaderMerge(A, &B)); 1973 } 1974 PetscFunctionReturn(PETSC_SUCCESS); 1975 } 1976 1977 static PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) 1978 { 1979 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1980 Mat a = aij->A, b = aij->B; 1981 PetscInt s1, s2, s3; 1982 1983 PetscFunctionBegin; 1984 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1985 if (rr) { 1986 PetscCall(VecGetLocalSize(rr, &s1)); 1987 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1988 /* Overlap communication with computation. */ 1989 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1990 } 1991 if (ll) { 1992 PetscCall(VecGetLocalSize(ll, &s1)); 1993 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1994 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1995 } 1996 /* scale the diagonal block */ 1997 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1998 1999 if (rr) { 2000 /* Do a scatter end and then right scale the off-diagonal block */ 2001 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2002 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 2003 } 2004 PetscFunctionReturn(PETSC_SUCCESS); 2005 } 2006 2007 static PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) 2008 { 2009 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2010 2011 PetscFunctionBegin; 2012 PetscCall(MatSetUnfactored(a->A)); 2013 PetscFunctionReturn(PETSC_SUCCESS); 2014 } 2015 2016 static PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) 2017 { 2018 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 2019 Mat a, b, c, d; 2020 PetscBool flg; 2021 2022 PetscFunctionBegin; 2023 a = matA->A; 2024 b = matA->B; 2025 c = matB->A; 2026 d = matB->B; 2027 2028 PetscCall(MatEqual(a, c, &flg)); 2029 if (flg) PetscCall(MatEqual(b, d, &flg)); 2030 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 2031 PetscFunctionReturn(PETSC_SUCCESS); 2032 } 2033 2034 static PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) 2035 { 2036 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2037 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2038 2039 PetscFunctionBegin; 2040 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2041 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2042 /* because of the column compression in the off-processor part of the matrix a->B, 2043 the number of columns in a->B and b->B may be different, hence we cannot call 2044 the MatCopy() directly on the two parts. If need be, we can provide a more 2045 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2046 then copying the submatrices */ 2047 PetscCall(MatCopy_Basic(A, B, str)); 2048 } else { 2049 PetscCall(MatCopy(a->A, b->A, str)); 2050 PetscCall(MatCopy(a->B, b->B, str)); 2051 } 2052 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2053 PetscFunctionReturn(PETSC_SUCCESS); 2054 } 2055 2056 /* 2057 Computes the number of nonzeros per row needed for preallocation when X and Y 2058 have different nonzero structure. 2059 */ 2060 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) 2061 { 2062 PetscInt i, j, k, nzx, nzy; 2063 2064 PetscFunctionBegin; 2065 /* Set the number of nonzeros in the new matrix */ 2066 for (i = 0; i < m; i++) { 2067 const PetscInt *xjj = PetscSafePointerPlusOffset(xj, xi[i]), *yjj = PetscSafePointerPlusOffset(yj, yi[i]); 2068 nzx = xi[i + 1] - xi[i]; 2069 nzy = yi[i + 1] - yi[i]; 2070 nnz[i] = 0; 2071 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2072 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2073 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2074 nnz[i]++; 2075 } 2076 for (; k < nzy; k++) nnz[i]++; 2077 } 2078 PetscFunctionReturn(PETSC_SUCCESS); 2079 } 2080 2081 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2082 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 2083 { 2084 PetscInt m = Y->rmap->N; 2085 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2086 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2087 2088 PetscFunctionBegin; 2089 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 2094 { 2095 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2096 2097 PetscFunctionBegin; 2098 if (str == SAME_NONZERO_PATTERN) { 2099 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2100 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2101 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2102 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2103 } else { 2104 Mat B; 2105 PetscInt *nnz_d, *nnz_o; 2106 2107 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2108 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2109 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2110 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2111 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2112 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2113 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2114 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2115 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2116 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2117 PetscCall(MatHeaderMerge(Y, &B)); 2118 PetscCall(PetscFree(nnz_d)); 2119 PetscCall(PetscFree(nnz_o)); 2120 } 2121 PetscFunctionReturn(PETSC_SUCCESS); 2122 } 2123 2124 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2125 2126 static PetscErrorCode MatConjugate_MPIAIJ(Mat mat) 2127 { 2128 PetscFunctionBegin; 2129 if (PetscDefined(USE_COMPLEX)) { 2130 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2131 2132 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2133 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2134 } 2135 PetscFunctionReturn(PETSC_SUCCESS); 2136 } 2137 2138 static PetscErrorCode MatRealPart_MPIAIJ(Mat A) 2139 { 2140 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2141 2142 PetscFunctionBegin; 2143 PetscCall(MatRealPart(a->A)); 2144 PetscCall(MatRealPart(a->B)); 2145 PetscFunctionReturn(PETSC_SUCCESS); 2146 } 2147 2148 static PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) 2149 { 2150 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2151 2152 PetscFunctionBegin; 2153 PetscCall(MatImaginaryPart(a->A)); 2154 PetscCall(MatImaginaryPart(a->B)); 2155 PetscFunctionReturn(PETSC_SUCCESS); 2156 } 2157 2158 static PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2159 { 2160 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2161 PetscInt i, *idxb = NULL, m = A->rmap->n; 2162 PetscScalar *vv; 2163 Vec vB, vA; 2164 const PetscScalar *va, *vb; 2165 2166 PetscFunctionBegin; 2167 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2168 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2169 2170 PetscCall(VecGetArrayRead(vA, &va)); 2171 if (idx) { 2172 for (i = 0; i < m; i++) { 2173 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2174 } 2175 } 2176 2177 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2178 PetscCall(PetscMalloc1(m, &idxb)); 2179 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2180 2181 PetscCall(VecGetArrayWrite(v, &vv)); 2182 PetscCall(VecGetArrayRead(vB, &vb)); 2183 for (i = 0; i < m; i++) { 2184 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2185 vv[i] = vb[i]; 2186 if (idx) idx[i] = a->garray[idxb[i]]; 2187 } else { 2188 vv[i] = va[i]; 2189 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2190 } 2191 } 2192 PetscCall(VecRestoreArrayWrite(v, &vv)); 2193 PetscCall(VecRestoreArrayRead(vA, &va)); 2194 PetscCall(VecRestoreArrayRead(vB, &vb)); 2195 PetscCall(PetscFree(idxb)); 2196 PetscCall(VecDestroy(&vA)); 2197 PetscCall(VecDestroy(&vB)); 2198 PetscFunctionReturn(PETSC_SUCCESS); 2199 } 2200 2201 static PetscErrorCode MatGetRowSumAbs_MPIAIJ(Mat A, Vec v) 2202 { 2203 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2204 Vec vB, vA; 2205 2206 PetscFunctionBegin; 2207 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 2208 PetscCall(MatGetRowSumAbs(a->A, vA)); 2209 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 2210 PetscCall(MatGetRowSumAbs(a->B, vB)); 2211 PetscCall(VecAXPY(vA, 1.0, vB)); 2212 PetscCall(VecDestroy(&vB)); 2213 PetscCall(VecCopy(vA, v)); 2214 PetscCall(VecDestroy(&vA)); 2215 PetscFunctionReturn(PETSC_SUCCESS); 2216 } 2217 2218 static PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2219 { 2220 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2221 PetscInt m = A->rmap->n, n = A->cmap->n; 2222 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2223 PetscInt *cmap = mat->garray; 2224 PetscInt *diagIdx, *offdiagIdx; 2225 Vec diagV, offdiagV; 2226 PetscScalar *a, *diagA, *offdiagA; 2227 const PetscScalar *ba, *bav; 2228 PetscInt r, j, col, ncols, *bi, *bj; 2229 Mat B = mat->B; 2230 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2231 2232 PetscFunctionBegin; 2233 /* When a process holds entire A and other processes have no entry */ 2234 if (A->cmap->N == n) { 2235 PetscCall(VecGetArrayWrite(v, &diagA)); 2236 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2237 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2238 PetscCall(VecDestroy(&diagV)); 2239 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2240 PetscFunctionReturn(PETSC_SUCCESS); 2241 } else if (n == 0) { 2242 if (m) { 2243 PetscCall(VecGetArrayWrite(v, &a)); 2244 for (r = 0; r < m; r++) { 2245 a[r] = 0.0; 2246 if (idx) idx[r] = -1; 2247 } 2248 PetscCall(VecRestoreArrayWrite(v, &a)); 2249 } 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2254 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2255 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2256 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2257 2258 /* Get offdiagIdx[] for implicit 0.0 */ 2259 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2260 ba = bav; 2261 bi = b->i; 2262 bj = b->j; 2263 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2264 for (r = 0; r < m; r++) { 2265 ncols = bi[r + 1] - bi[r]; 2266 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2267 offdiagA[r] = *ba; 2268 offdiagIdx[r] = cmap[0]; 2269 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2270 offdiagA[r] = 0.0; 2271 2272 /* Find first hole in the cmap */ 2273 for (j = 0; j < ncols; j++) { 2274 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2275 if (col > j && j < cstart) { 2276 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2277 break; 2278 } else if (col > j + n && j >= cstart) { 2279 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2280 break; 2281 } 2282 } 2283 if (j == ncols && ncols < A->cmap->N - n) { 2284 /* a hole is outside compressed Bcols */ 2285 if (ncols == 0) { 2286 if (cstart) { 2287 offdiagIdx[r] = 0; 2288 } else offdiagIdx[r] = cend; 2289 } else { /* ncols > 0 */ 2290 offdiagIdx[r] = cmap[ncols - 1] + 1; 2291 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2292 } 2293 } 2294 } 2295 2296 for (j = 0; j < ncols; j++) { 2297 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2298 offdiagA[r] = *ba; 2299 offdiagIdx[r] = cmap[*bj]; 2300 } 2301 ba++; 2302 bj++; 2303 } 2304 } 2305 2306 PetscCall(VecGetArrayWrite(v, &a)); 2307 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2308 for (r = 0; r < m; ++r) { 2309 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2310 a[r] = diagA[r]; 2311 if (idx) idx[r] = cstart + diagIdx[r]; 2312 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2313 a[r] = diagA[r]; 2314 if (idx) { 2315 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2316 idx[r] = cstart + diagIdx[r]; 2317 } else idx[r] = offdiagIdx[r]; 2318 } 2319 } else { 2320 a[r] = offdiagA[r]; 2321 if (idx) idx[r] = offdiagIdx[r]; 2322 } 2323 } 2324 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2325 PetscCall(VecRestoreArrayWrite(v, &a)); 2326 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2327 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2328 PetscCall(VecDestroy(&diagV)); 2329 PetscCall(VecDestroy(&offdiagV)); 2330 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2331 PetscFunctionReturn(PETSC_SUCCESS); 2332 } 2333 2334 static PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2335 { 2336 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2337 PetscInt m = A->rmap->n, n = A->cmap->n; 2338 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2339 PetscInt *cmap = mat->garray; 2340 PetscInt *diagIdx, *offdiagIdx; 2341 Vec diagV, offdiagV; 2342 PetscScalar *a, *diagA, *offdiagA; 2343 const PetscScalar *ba, *bav; 2344 PetscInt r, j, col, ncols, *bi, *bj; 2345 Mat B = mat->B; 2346 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2347 2348 PetscFunctionBegin; 2349 /* When a process holds entire A and other processes have no entry */ 2350 if (A->cmap->N == n) { 2351 PetscCall(VecGetArrayWrite(v, &diagA)); 2352 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2353 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2354 PetscCall(VecDestroy(&diagV)); 2355 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2356 PetscFunctionReturn(PETSC_SUCCESS); 2357 } else if (n == 0) { 2358 if (m) { 2359 PetscCall(VecGetArrayWrite(v, &a)); 2360 for (r = 0; r < m; r++) { 2361 a[r] = PETSC_MAX_REAL; 2362 if (idx) idx[r] = -1; 2363 } 2364 PetscCall(VecRestoreArrayWrite(v, &a)); 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2370 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2371 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2372 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2373 2374 /* Get offdiagIdx[] for implicit 0.0 */ 2375 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2376 ba = bav; 2377 bi = b->i; 2378 bj = b->j; 2379 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2380 for (r = 0; r < m; r++) { 2381 ncols = bi[r + 1] - bi[r]; 2382 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2383 offdiagA[r] = *ba; 2384 offdiagIdx[r] = cmap[0]; 2385 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2386 offdiagA[r] = 0.0; 2387 2388 /* Find first hole in the cmap */ 2389 for (j = 0; j < ncols; j++) { 2390 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2391 if (col > j && j < cstart) { 2392 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2393 break; 2394 } else if (col > j + n && j >= cstart) { 2395 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2396 break; 2397 } 2398 } 2399 if (j == ncols && ncols < A->cmap->N - n) { 2400 /* a hole is outside compressed Bcols */ 2401 if (ncols == 0) { 2402 if (cstart) { 2403 offdiagIdx[r] = 0; 2404 } else offdiagIdx[r] = cend; 2405 } else { /* ncols > 0 */ 2406 offdiagIdx[r] = cmap[ncols - 1] + 1; 2407 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2408 } 2409 } 2410 } 2411 2412 for (j = 0; j < ncols; j++) { 2413 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2414 offdiagA[r] = *ba; 2415 offdiagIdx[r] = cmap[*bj]; 2416 } 2417 ba++; 2418 bj++; 2419 } 2420 } 2421 2422 PetscCall(VecGetArrayWrite(v, &a)); 2423 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2424 for (r = 0; r < m; ++r) { 2425 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2426 a[r] = diagA[r]; 2427 if (idx) idx[r] = cstart + diagIdx[r]; 2428 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2429 a[r] = diagA[r]; 2430 if (idx) { 2431 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2432 idx[r] = cstart + diagIdx[r]; 2433 } else idx[r] = offdiagIdx[r]; 2434 } 2435 } else { 2436 a[r] = offdiagA[r]; 2437 if (idx) idx[r] = offdiagIdx[r]; 2438 } 2439 } 2440 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2441 PetscCall(VecRestoreArrayWrite(v, &a)); 2442 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2443 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2444 PetscCall(VecDestroy(&diagV)); 2445 PetscCall(VecDestroy(&offdiagV)); 2446 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2447 PetscFunctionReturn(PETSC_SUCCESS); 2448 } 2449 2450 static PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) 2451 { 2452 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2453 PetscInt m = A->rmap->n, n = A->cmap->n; 2454 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2455 PetscInt *cmap = mat->garray; 2456 PetscInt *diagIdx, *offdiagIdx; 2457 Vec diagV, offdiagV; 2458 PetscScalar *a, *diagA, *offdiagA; 2459 const PetscScalar *ba, *bav; 2460 PetscInt r, j, col, ncols, *bi, *bj; 2461 Mat B = mat->B; 2462 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2463 2464 PetscFunctionBegin; 2465 /* When a process holds entire A and other processes have no entry */ 2466 if (A->cmap->N == n) { 2467 PetscCall(VecGetArrayWrite(v, &diagA)); 2468 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2469 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2470 PetscCall(VecDestroy(&diagV)); 2471 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2472 PetscFunctionReturn(PETSC_SUCCESS); 2473 } else if (n == 0) { 2474 if (m) { 2475 PetscCall(VecGetArrayWrite(v, &a)); 2476 for (r = 0; r < m; r++) { 2477 a[r] = PETSC_MIN_REAL; 2478 if (idx) idx[r] = -1; 2479 } 2480 PetscCall(VecRestoreArrayWrite(v, &a)); 2481 } 2482 PetscFunctionReturn(PETSC_SUCCESS); 2483 } 2484 2485 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2486 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2487 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2488 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2489 2490 /* Get offdiagIdx[] for implicit 0.0 */ 2491 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2492 ba = bav; 2493 bi = b->i; 2494 bj = b->j; 2495 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2496 for (r = 0; r < m; r++) { 2497 ncols = bi[r + 1] - bi[r]; 2498 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2499 offdiagA[r] = *ba; 2500 offdiagIdx[r] = cmap[0]; 2501 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2502 offdiagA[r] = 0.0; 2503 2504 /* Find first hole in the cmap */ 2505 for (j = 0; j < ncols; j++) { 2506 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2507 if (col > j && j < cstart) { 2508 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2509 break; 2510 } else if (col > j + n && j >= cstart) { 2511 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2512 break; 2513 } 2514 } 2515 if (j == ncols && ncols < A->cmap->N - n) { 2516 /* a hole is outside compressed Bcols */ 2517 if (ncols == 0) { 2518 if (cstart) { 2519 offdiagIdx[r] = 0; 2520 } else offdiagIdx[r] = cend; 2521 } else { /* ncols > 0 */ 2522 offdiagIdx[r] = cmap[ncols - 1] + 1; 2523 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2524 } 2525 } 2526 } 2527 2528 for (j = 0; j < ncols; j++) { 2529 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2530 offdiagA[r] = *ba; 2531 offdiagIdx[r] = cmap[*bj]; 2532 } 2533 ba++; 2534 bj++; 2535 } 2536 } 2537 2538 PetscCall(VecGetArrayWrite(v, &a)); 2539 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2540 for (r = 0; r < m; ++r) { 2541 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2542 a[r] = diagA[r]; 2543 if (idx) idx[r] = cstart + diagIdx[r]; 2544 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2545 a[r] = diagA[r]; 2546 if (idx) { 2547 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2548 idx[r] = cstart + diagIdx[r]; 2549 } else idx[r] = offdiagIdx[r]; 2550 } 2551 } else { 2552 a[r] = offdiagA[r]; 2553 if (idx) idx[r] = offdiagIdx[r]; 2554 } 2555 } 2556 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2557 PetscCall(VecRestoreArrayWrite(v, &a)); 2558 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2559 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2560 PetscCall(VecDestroy(&diagV)); 2561 PetscCall(VecDestroy(&offdiagV)); 2562 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2563 PetscFunctionReturn(PETSC_SUCCESS); 2564 } 2565 2566 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) 2567 { 2568 Mat *dummy; 2569 2570 PetscFunctionBegin; 2571 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2572 *newmat = *dummy; 2573 PetscCall(PetscFree(dummy)); 2574 PetscFunctionReturn(PETSC_SUCCESS); 2575 } 2576 2577 static PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) 2578 { 2579 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2580 2581 PetscFunctionBegin; 2582 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2583 A->factorerrortype = a->A->factorerrortype; 2584 PetscFunctionReturn(PETSC_SUCCESS); 2585 } 2586 2587 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) 2588 { 2589 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2590 2591 PetscFunctionBegin; 2592 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2593 PetscCall(MatSetRandom(aij->A, rctx)); 2594 if (x->assembled) { 2595 PetscCall(MatSetRandom(aij->B, rctx)); 2596 } else { 2597 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2598 } 2599 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2600 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2601 PetscFunctionReturn(PETSC_SUCCESS); 2602 } 2603 2604 static PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) 2605 { 2606 PetscFunctionBegin; 2607 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2608 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2609 PetscFunctionReturn(PETSC_SUCCESS); 2610 } 2611 2612 /*@ 2613 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2614 2615 Not Collective 2616 2617 Input Parameter: 2618 . A - the matrix 2619 2620 Output Parameter: 2621 . nz - the number of nonzeros 2622 2623 Level: advanced 2624 2625 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2626 @*/ 2627 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) 2628 { 2629 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2630 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2631 PetscBool isaij; 2632 2633 PetscFunctionBegin; 2634 PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATMPIAIJ, &isaij)); 2635 PetscCheck(isaij, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Not for type %s", ((PetscObject)A)->type_name); 2636 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2637 PetscFunctionReturn(PETSC_SUCCESS); 2638 } 2639 2640 /*@ 2641 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2642 2643 Collective 2644 2645 Input Parameters: 2646 + A - the matrix 2647 - sc - `PETSC_TRUE` indicates use the scalable algorithm (default is not to use the scalable algorithm) 2648 2649 Level: advanced 2650 2651 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ` 2652 @*/ 2653 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) 2654 { 2655 PetscFunctionBegin; 2656 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems PetscOptionsObject) 2661 { 2662 PetscBool sc = PETSC_FALSE, flg; 2663 2664 PetscFunctionBegin; 2665 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2666 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2667 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2668 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2669 PetscOptionsHeadEnd(); 2670 PetscFunctionReturn(PETSC_SUCCESS); 2671 } 2672 2673 static PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) 2674 { 2675 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2676 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2677 2678 PetscFunctionBegin; 2679 if (!Y->preallocated) { 2680 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2681 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2682 PetscInt nonew = aij->nonew; 2683 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2684 aij->nonew = nonew; 2685 } 2686 PetscCall(MatShift_Basic(Y, a)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 static PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) 2691 { 2692 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2693 2694 PetscFunctionBegin; 2695 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2696 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2697 if (d) { 2698 PetscInt rstart; 2699 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2700 *d += rstart; 2701 } 2702 PetscFunctionReturn(PETSC_SUCCESS); 2703 } 2704 2705 static PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) 2706 { 2707 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2708 2709 PetscFunctionBegin; 2710 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2711 PetscFunctionReturn(PETSC_SUCCESS); 2712 } 2713 2714 static PetscErrorCode MatEliminateZeros_MPIAIJ(Mat A, PetscBool keep) 2715 { 2716 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2717 2718 PetscFunctionBegin; 2719 PetscCall(MatEliminateZeros_SeqAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2720 PetscCall(MatEliminateZeros_SeqAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2721 PetscFunctionReturn(PETSC_SUCCESS); 2722 } 2723 2724 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2725 MatGetRow_MPIAIJ, 2726 MatRestoreRow_MPIAIJ, 2727 MatMult_MPIAIJ, 2728 /* 4*/ MatMultAdd_MPIAIJ, 2729 MatMultTranspose_MPIAIJ, 2730 MatMultTransposeAdd_MPIAIJ, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*10*/ NULL, 2735 NULL, 2736 NULL, 2737 MatSOR_MPIAIJ, 2738 MatTranspose_MPIAIJ, 2739 /*15*/ MatGetInfo_MPIAIJ, 2740 MatEqual_MPIAIJ, 2741 MatGetDiagonal_MPIAIJ, 2742 MatDiagonalScale_MPIAIJ, 2743 MatNorm_MPIAIJ, 2744 /*20*/ MatAssemblyBegin_MPIAIJ, 2745 MatAssemblyEnd_MPIAIJ, 2746 MatSetOption_MPIAIJ, 2747 MatZeroEntries_MPIAIJ, 2748 /*24*/ MatZeroRows_MPIAIJ, 2749 NULL, 2750 NULL, 2751 NULL, 2752 NULL, 2753 /*29*/ MatSetUp_MPI_Hash, 2754 NULL, 2755 NULL, 2756 MatGetDiagonalBlock_MPIAIJ, 2757 NULL, 2758 /*34*/ MatDuplicate_MPIAIJ, 2759 NULL, 2760 NULL, 2761 NULL, 2762 NULL, 2763 /*39*/ MatAXPY_MPIAIJ, 2764 MatCreateSubMatrices_MPIAIJ, 2765 MatIncreaseOverlap_MPIAIJ, 2766 MatGetValues_MPIAIJ, 2767 MatCopy_MPIAIJ, 2768 /*44*/ MatGetRowMax_MPIAIJ, 2769 MatScale_MPIAIJ, 2770 MatShift_MPIAIJ, 2771 MatDiagonalSet_MPIAIJ, 2772 MatZeroRowsColumns_MPIAIJ, 2773 /*49*/ MatSetRandom_MPIAIJ, 2774 MatGetRowIJ_MPIAIJ, 2775 MatRestoreRowIJ_MPIAIJ, 2776 NULL, 2777 NULL, 2778 /*54*/ MatFDColoringCreate_MPIXAIJ, 2779 NULL, 2780 MatSetUnfactored_MPIAIJ, 2781 MatPermute_MPIAIJ, 2782 NULL, 2783 /*59*/ MatCreateSubMatrix_MPIAIJ, 2784 MatDestroy_MPIAIJ, 2785 MatView_MPIAIJ, 2786 NULL, 2787 NULL, 2788 /*64*/ NULL, 2789 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2790 NULL, 2791 NULL, 2792 NULL, 2793 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2794 MatGetRowMinAbs_MPIAIJ, 2795 NULL, 2796 NULL, 2797 NULL, 2798 NULL, 2799 /*75*/ MatFDColoringApply_AIJ, 2800 MatSetFromOptions_MPIAIJ, 2801 NULL, 2802 NULL, 2803 MatFindZeroDiagonals_MPIAIJ, 2804 /*80*/ NULL, 2805 NULL, 2806 NULL, 2807 /*83*/ MatLoad_MPIAIJ, 2808 NULL, 2809 NULL, 2810 NULL, 2811 NULL, 2812 NULL, 2813 /*89*/ NULL, 2814 NULL, 2815 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2816 NULL, 2817 NULL, 2818 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2819 NULL, 2820 NULL, 2821 NULL, 2822 MatBindToCPU_MPIAIJ, 2823 /*99*/ MatProductSetFromOptions_MPIAIJ, 2824 NULL, 2825 NULL, 2826 MatConjugate_MPIAIJ, 2827 NULL, 2828 /*104*/ MatSetValuesRow_MPIAIJ, 2829 MatRealPart_MPIAIJ, 2830 MatImaginaryPart_MPIAIJ, 2831 NULL, 2832 NULL, 2833 /*109*/ NULL, 2834 NULL, 2835 MatGetRowMin_MPIAIJ, 2836 NULL, 2837 MatMissingDiagonal_MPIAIJ, 2838 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2839 NULL, 2840 MatGetGhosts_MPIAIJ, 2841 NULL, 2842 NULL, 2843 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2844 NULL, 2845 NULL, 2846 NULL, 2847 MatGetMultiProcBlock_MPIAIJ, 2848 /*124*/ MatFindNonzeroRows_MPIAIJ, 2849 MatGetColumnReductions_MPIAIJ, 2850 MatInvertBlockDiagonal_MPIAIJ, 2851 MatInvertVariableBlockDiagonal_MPIAIJ, 2852 MatCreateSubMatricesMPI_MPIAIJ, 2853 /*129*/ NULL, 2854 NULL, 2855 NULL, 2856 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2857 NULL, 2858 /*134*/ NULL, 2859 NULL, 2860 NULL, 2861 NULL, 2862 NULL, 2863 /*139*/ MatSetBlockSizes_MPIAIJ, 2864 NULL, 2865 NULL, 2866 MatFDColoringSetUp_MPIXAIJ, 2867 MatFindOffBlockDiagonalEntries_MPIAIJ, 2868 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2869 /*145*/ NULL, 2870 NULL, 2871 NULL, 2872 MatCreateGraph_Simple_AIJ, 2873 NULL, 2874 /*150*/ NULL, 2875 MatEliminateZeros_MPIAIJ, 2876 MatGetRowSumAbs_MPIAIJ, 2877 NULL, 2878 NULL, 2879 /*155*/ NULL, 2880 MatCopyHashToXAIJ_MPI_Hash}; 2881 2882 static PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) 2883 { 2884 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2885 2886 PetscFunctionBegin; 2887 PetscCall(MatStoreValues(aij->A)); 2888 PetscCall(MatStoreValues(aij->B)); 2889 PetscFunctionReturn(PETSC_SUCCESS); 2890 } 2891 2892 static PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) 2893 { 2894 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2895 2896 PetscFunctionBegin; 2897 PetscCall(MatRetrieveValues(aij->A)); 2898 PetscCall(MatRetrieveValues(aij->B)); 2899 PetscFunctionReturn(PETSC_SUCCESS); 2900 } 2901 2902 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 2903 { 2904 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2905 PetscMPIInt size; 2906 2907 PetscFunctionBegin; 2908 if (B->hash_active) { 2909 B->ops[0] = b->cops; 2910 B->hash_active = PETSC_FALSE; 2911 } 2912 PetscCall(PetscLayoutSetUp(B->rmap)); 2913 PetscCall(PetscLayoutSetUp(B->cmap)); 2914 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscHMapIDestroy(&b->colmap)); 2917 #else 2918 PetscCall(PetscFree(b->colmap)); 2919 #endif 2920 PetscCall(PetscFree(b->garray)); 2921 PetscCall(VecDestroy(&b->lvec)); 2922 PetscCall(VecScatterDestroy(&b->Mvctx)); 2923 2924 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2925 2926 MatSeqXAIJGetOptions_Private(b->B); 2927 PetscCall(MatDestroy(&b->B)); 2928 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2929 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2930 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2931 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2932 MatSeqXAIJRestoreOptions_Private(b->B); 2933 2934 MatSeqXAIJGetOptions_Private(b->A); 2935 PetscCall(MatDestroy(&b->A)); 2936 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2937 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2938 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2939 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2940 MatSeqXAIJRestoreOptions_Private(b->A); 2941 2942 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2943 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2944 B->preallocated = PETSC_TRUE; 2945 B->was_assembled = PETSC_FALSE; 2946 B->assembled = PETSC_FALSE; 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 static PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) 2951 { 2952 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2953 PetscBool ondiagreset, offdiagreset, memoryreset; 2954 2955 PetscFunctionBegin; 2956 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2957 PetscCheck(B->insertmode == NOT_SET_VALUES, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot reset preallocation after setting some values but not yet calling MatAssemblyBegin()/MatAssemblyEnd()"); 2958 if (B->num_ass == 0) PetscFunctionReturn(PETSC_SUCCESS); 2959 2960 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->A, &ondiagreset)); 2961 PetscCall(MatResetPreallocation_SeqAIJ_Private(b->B, &offdiagreset)); 2962 memoryreset = (PetscBool)(ondiagreset || offdiagreset); 2963 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &memoryreset, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)B))); 2964 if (!memoryreset) PetscFunctionReturn(PETSC_SUCCESS); 2965 2966 PetscCall(PetscLayoutSetUp(B->rmap)); 2967 PetscCall(PetscLayoutSetUp(B->cmap)); 2968 PetscCheck(B->assembled || B->was_assembled, PetscObjectComm((PetscObject)B), PETSC_ERR_ARG_WRONGSTATE, "Should not need to reset preallocation if the matrix was never assembled"); 2969 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_TRUE)); 2970 PetscCall(VecScatterDestroy(&b->Mvctx)); 2971 2972 B->preallocated = PETSC_TRUE; 2973 B->was_assembled = PETSC_FALSE; 2974 B->assembled = PETSC_FALSE; 2975 /* Log that the state of this object has changed; this will help guarantee that preconditioners get re-setup */ 2976 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2977 PetscFunctionReturn(PETSC_SUCCESS); 2978 } 2979 2980 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 2981 { 2982 Mat mat; 2983 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2984 2985 PetscFunctionBegin; 2986 *newmat = NULL; 2987 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2988 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2989 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2990 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2991 a = (Mat_MPIAIJ *)mat->data; 2992 2993 mat->factortype = matin->factortype; 2994 mat->assembled = matin->assembled; 2995 mat->insertmode = NOT_SET_VALUES; 2996 2997 a->size = oldmat->size; 2998 a->rank = oldmat->rank; 2999 a->donotstash = oldmat->donotstash; 3000 a->roworiented = oldmat->roworiented; 3001 a->rowindices = NULL; 3002 a->rowvalues = NULL; 3003 a->getrowactive = PETSC_FALSE; 3004 3005 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3006 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3007 if (matin->hash_active) { 3008 PetscCall(MatSetUp(mat)); 3009 } else { 3010 mat->preallocated = matin->preallocated; 3011 if (oldmat->colmap) { 3012 #if defined(PETSC_USE_CTABLE) 3013 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3014 #else 3015 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 3016 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 3017 #endif 3018 } else a->colmap = NULL; 3019 if (oldmat->garray) { 3020 PetscInt len; 3021 len = oldmat->B->cmap->n; 3022 PetscCall(PetscMalloc1(len + 1, &a->garray)); 3023 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3024 } else a->garray = NULL; 3025 3026 /* It may happen MatDuplicate is called with a non-assembled matrix 3027 In fact, MatDuplicate only requires the matrix to be preallocated 3028 This may happen inside a DMCreateMatrix_Shell */ 3029 if (oldmat->lvec) PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3030 if (oldmat->Mvctx) { 3031 a->Mvctx = oldmat->Mvctx; 3032 PetscCall(PetscObjectReference((PetscObject)oldmat->Mvctx)); 3033 } 3034 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3035 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3036 } 3037 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3038 *newmat = mat; 3039 PetscFunctionReturn(PETSC_SUCCESS); 3040 } 3041 3042 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) 3043 { 3044 PetscBool isbinary, ishdf5; 3045 3046 PetscFunctionBegin; 3047 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 3048 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 3049 /* force binary viewer to load .info file if it has not yet done so */ 3050 PetscCall(PetscViewerSetUp(viewer)); 3051 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3052 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 3053 if (isbinary) { 3054 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 3055 } else if (ishdf5) { 3056 #if defined(PETSC_HAVE_HDF5) 3057 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 3058 #else 3059 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 3060 #endif 3061 } else { 3062 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 3063 } 3064 PetscFunctionReturn(PETSC_SUCCESS); 3065 } 3066 3067 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) 3068 { 3069 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 3070 PetscInt *rowidxs, *colidxs; 3071 PetscScalar *matvals; 3072 3073 PetscFunctionBegin; 3074 PetscCall(PetscViewerSetUp(viewer)); 3075 3076 /* read in matrix header */ 3077 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3078 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3079 M = header[1]; 3080 N = header[2]; 3081 nz = header[3]; 3082 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3083 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3084 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 3085 3086 /* set block sizes from the viewer's .info file */ 3087 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3088 /* set global sizes if not set already */ 3089 if (mat->rmap->N < 0) mat->rmap->N = M; 3090 if (mat->cmap->N < 0) mat->cmap->N = N; 3091 PetscCall(PetscLayoutSetUp(mat->rmap)); 3092 PetscCall(PetscLayoutSetUp(mat->cmap)); 3093 3094 /* check if the matrix sizes are correct */ 3095 PetscCall(MatGetSize(mat, &rows, &cols)); 3096 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3097 3098 /* read in row lengths and build row indices */ 3099 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3100 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3101 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3102 rowidxs[0] = 0; 3103 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3104 if (nz != PETSC_INT_MAX) { 3105 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3106 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3107 } 3108 3109 /* read in column indices and matrix values */ 3110 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3111 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3112 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3113 /* store matrix indices and values */ 3114 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3115 PetscCall(PetscFree(rowidxs)); 3116 PetscCall(PetscFree2(colidxs, matvals)); 3117 PetscFunctionReturn(PETSC_SUCCESS); 3118 } 3119 3120 /* Not scalable because of ISAllGather() unless getting all columns. */ 3121 static PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) 3122 { 3123 IS iscol_local; 3124 PetscBool isstride; 3125 PetscMPIInt gisstride = 0; 3126 3127 PetscFunctionBegin; 3128 /* check if we are grabbing all columns*/ 3129 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3130 3131 if (isstride) { 3132 PetscInt start, len, mstart, mlen; 3133 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3134 PetscCall(ISGetLocalSize(iscol, &len)); 3135 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3136 if (mstart == start && mlen - mstart == len) gisstride = 1; 3137 } 3138 3139 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3140 if (gisstride) { 3141 PetscInt N; 3142 PetscCall(MatGetSize(mat, NULL, &N)); 3143 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3144 PetscCall(ISSetIdentity(iscol_local)); 3145 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3146 } else { 3147 PetscInt cbs; 3148 PetscCall(ISGetBlockSize(iscol, &cbs)); 3149 PetscCall(ISAllGather(iscol, &iscol_local)); 3150 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3151 } 3152 3153 *isseq = iscol_local; 3154 PetscFunctionReturn(PETSC_SUCCESS); 3155 } 3156 3157 /* 3158 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3159 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3160 3161 Input Parameters: 3162 + mat - matrix 3163 . isrow - parallel row index set; its local indices are a subset of local columns of `mat`, 3164 i.e., mat->rstart <= isrow[i] < mat->rend 3165 - iscol - parallel column index set; its local indices are a subset of local columns of `mat`, 3166 i.e., mat->cstart <= iscol[i] < mat->cend 3167 3168 Output Parameters: 3169 + isrow_d - sequential row index set for retrieving mat->A 3170 . iscol_d - sequential column index set for retrieving mat->A 3171 . iscol_o - sequential column index set for retrieving mat->B 3172 - garray - column map; garray[i] indicates global location of iscol_o[i] in `iscol` 3173 */ 3174 static PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, PetscInt *garray[]) 3175 { 3176 Vec x, cmap; 3177 const PetscInt *is_idx; 3178 PetscScalar *xarray, *cmaparray; 3179 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3180 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3181 Mat B = a->B; 3182 Vec lvec = a->lvec, lcmap; 3183 PetscInt i, cstart, cend, Bn = B->cmap->N; 3184 MPI_Comm comm; 3185 VecScatter Mvctx = a->Mvctx; 3186 3187 PetscFunctionBegin; 3188 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3189 PetscCall(ISGetLocalSize(iscol, &ncols)); 3190 3191 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3192 PetscCall(MatCreateVecs(mat, &x, NULL)); 3193 PetscCall(VecSet(x, -1.0)); 3194 PetscCall(VecDuplicate(x, &cmap)); 3195 PetscCall(VecSet(cmap, -1.0)); 3196 3197 /* Get start indices */ 3198 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3199 isstart -= ncols; 3200 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3201 3202 PetscCall(ISGetIndices(iscol, &is_idx)); 3203 PetscCall(VecGetArray(x, &xarray)); 3204 PetscCall(VecGetArray(cmap, &cmaparray)); 3205 PetscCall(PetscMalloc1(ncols, &idx)); 3206 for (i = 0; i < ncols; i++) { 3207 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3208 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3209 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3210 } 3211 PetscCall(VecRestoreArray(x, &xarray)); 3212 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3213 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3214 3215 /* Get iscol_d */ 3216 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3217 PetscCall(ISGetBlockSize(iscol, &i)); 3218 PetscCall(ISSetBlockSize(*iscol_d, i)); 3219 3220 /* Get isrow_d */ 3221 PetscCall(ISGetLocalSize(isrow, &m)); 3222 rstart = mat->rmap->rstart; 3223 PetscCall(PetscMalloc1(m, &idx)); 3224 PetscCall(ISGetIndices(isrow, &is_idx)); 3225 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3226 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3227 3228 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3229 PetscCall(ISGetBlockSize(isrow, &i)); 3230 PetscCall(ISSetBlockSize(*isrow_d, i)); 3231 3232 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3233 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3234 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3235 3236 PetscCall(VecDuplicate(lvec, &lcmap)); 3237 3238 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3239 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3240 3241 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3242 /* off-process column indices */ 3243 count = 0; 3244 PetscCall(PetscMalloc1(Bn, &idx)); 3245 PetscCall(PetscMalloc1(Bn, &cmap1)); 3246 3247 PetscCall(VecGetArray(lvec, &xarray)); 3248 PetscCall(VecGetArray(lcmap, &cmaparray)); 3249 for (i = 0; i < Bn; i++) { 3250 if (PetscRealPart(xarray[i]) > -1.0) { 3251 idx[count] = i; /* local column index in off-diagonal part B */ 3252 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3253 count++; 3254 } 3255 } 3256 PetscCall(VecRestoreArray(lvec, &xarray)); 3257 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3258 3259 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3260 /* cannot ensure iscol_o has same blocksize as iscol! */ 3261 3262 PetscCall(PetscFree(idx)); 3263 *garray = cmap1; 3264 3265 PetscCall(VecDestroy(&x)); 3266 PetscCall(VecDestroy(&cmap)); 3267 PetscCall(VecDestroy(&lcmap)); 3268 PetscFunctionReturn(PETSC_SUCCESS); 3269 } 3270 3271 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3272 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) 3273 { 3274 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3275 Mat M = NULL; 3276 MPI_Comm comm; 3277 IS iscol_d, isrow_d, iscol_o; 3278 Mat Asub = NULL, Bsub = NULL; 3279 PetscInt n, count, M_size, N_size; 3280 3281 PetscFunctionBegin; 3282 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3283 3284 if (call == MAT_REUSE_MATRIX) { 3285 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3286 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3287 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3288 3289 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3290 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3291 3292 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3293 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3294 3295 /* Update diagonal and off-diagonal portions of submat */ 3296 asub = (Mat_MPIAIJ *)(*submat)->data; 3297 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3298 PetscCall(ISGetLocalSize(iscol_o, &n)); 3299 if (n) PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); 3300 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3301 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3302 3303 } else { /* call == MAT_INITIAL_MATRIX) */ 3304 PetscInt *garray, *garray_compact; 3305 PetscInt BsubN; 3306 3307 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3308 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3309 3310 /* Create local submatrices Asub and Bsub */ 3311 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3312 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3313 3314 // Compact garray so its not of size Bn 3315 PetscCall(ISGetSize(iscol_o, &count)); 3316 PetscCall(PetscMalloc1(count, &garray_compact)); 3317 PetscCall(PetscArraycpy(garray_compact, garray, count)); 3318 3319 /* Create submatrix M */ 3320 PetscCall(ISGetSize(isrow, &M_size)); 3321 PetscCall(ISGetSize(iscol, &N_size)); 3322 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, M_size, N_size, Asub, Bsub, garray_compact, &M)); 3323 3324 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3325 asub = (Mat_MPIAIJ *)M->data; 3326 3327 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3328 n = asub->B->cmap->N; 3329 if (BsubN > n) { 3330 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3331 const PetscInt *idx; 3332 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3333 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3334 3335 PetscCall(PetscMalloc1(n, &idx_new)); 3336 j = 0; 3337 PetscCall(ISGetIndices(iscol_o, &idx)); 3338 for (i = 0; i < n; i++) { 3339 if (j >= BsubN) break; 3340 while (subgarray[i] > garray[j]) j++; 3341 3342 if (subgarray[i] == garray[j]) { 3343 idx_new[i] = idx[j++]; 3344 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3345 } 3346 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3347 3348 PetscCall(ISDestroy(&iscol_o)); 3349 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3350 3351 } else if (BsubN < n) { 3352 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3353 } 3354 3355 PetscCall(PetscFree(garray)); 3356 *submat = M; 3357 3358 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3359 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3360 PetscCall(ISDestroy(&isrow_d)); 3361 3362 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3363 PetscCall(ISDestroy(&iscol_d)); 3364 3365 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3366 PetscCall(ISDestroy(&iscol_o)); 3367 } 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 3372 { 3373 IS iscol_local = NULL, isrow_d; 3374 PetscInt csize; 3375 PetscInt n, i, j, start, end; 3376 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3377 MPI_Comm comm; 3378 3379 PetscFunctionBegin; 3380 /* If isrow has same processor distribution as mat, 3381 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3382 if (call == MAT_REUSE_MATRIX) { 3383 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3384 if (isrow_d) { 3385 sameRowDist = PETSC_TRUE; 3386 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3387 } else { 3388 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3389 if (iscol_local) { 3390 sameRowDist = PETSC_TRUE; 3391 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3392 } 3393 } 3394 } else { 3395 /* Check if isrow has same processor distribution as mat */ 3396 sameDist[0] = PETSC_FALSE; 3397 PetscCall(ISGetLocalSize(isrow, &n)); 3398 if (!n) { 3399 sameDist[0] = PETSC_TRUE; 3400 } else { 3401 PetscCall(ISGetMinMax(isrow, &i, &j)); 3402 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3403 if (i >= start && j < end) sameDist[0] = PETSC_TRUE; 3404 } 3405 3406 /* Check if iscol has same processor distribution as mat */ 3407 sameDist[1] = PETSC_FALSE; 3408 PetscCall(ISGetLocalSize(iscol, &n)); 3409 if (!n) { 3410 sameDist[1] = PETSC_TRUE; 3411 } else { 3412 PetscCall(ISGetMinMax(iscol, &i, &j)); 3413 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3414 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3415 } 3416 3417 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3418 PetscCallMPI(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3419 sameRowDist = tsameDist[0]; 3420 } 3421 3422 if (sameRowDist) { 3423 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3424 /* isrow and iscol have same processor distribution as mat */ 3425 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3426 PetscFunctionReturn(PETSC_SUCCESS); 3427 } else { /* sameRowDist */ 3428 /* isrow has same processor distribution as mat */ 3429 if (call == MAT_INITIAL_MATRIX) { 3430 PetscBool sorted; 3431 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3432 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3433 PetscCall(ISGetSize(iscol, &i)); 3434 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3435 3436 PetscCall(ISSorted(iscol_local, &sorted)); 3437 if (sorted) { 3438 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3439 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3440 PetscFunctionReturn(PETSC_SUCCESS); 3441 } 3442 } else { /* call == MAT_REUSE_MATRIX */ 3443 IS iscol_sub; 3444 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3445 if (iscol_sub) { 3446 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3447 PetscFunctionReturn(PETSC_SUCCESS); 3448 } 3449 } 3450 } 3451 } 3452 3453 /* General case: iscol -> iscol_local which has global size of iscol */ 3454 if (call == MAT_REUSE_MATRIX) { 3455 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3456 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3457 } else { 3458 if (!iscol_local) PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3459 } 3460 3461 PetscCall(ISGetLocalSize(iscol, &csize)); 3462 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3463 3464 if (call == MAT_INITIAL_MATRIX) { 3465 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3466 PetscCall(ISDestroy(&iscol_local)); 3467 } 3468 PetscFunctionReturn(PETSC_SUCCESS); 3469 } 3470 3471 /*@C 3472 MatCreateMPIAIJWithSeqAIJ - creates a `MATMPIAIJ` matrix using `MATSEQAIJ` matrices that contain the "diagonal" 3473 and "off-diagonal" part of the matrix in CSR format. 3474 3475 Collective 3476 3477 Input Parameters: 3478 + comm - MPI communicator 3479 . M - the global row size 3480 . N - the global column size 3481 . A - "diagonal" portion of matrix 3482 . B - if garray is `NULL`, B should be the offdiag matrix using global col ids and of size N - if garray is not `NULL`, B should be the offdiag matrix using local col ids and of size garray 3483 - garray - either `NULL` or the global index of `B` columns 3484 3485 Output Parameter: 3486 . mat - the matrix, with input `A` as its local diagonal matrix 3487 3488 Level: advanced 3489 3490 Notes: 3491 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3492 3493 `A` and `B` becomes part of output mat. The user cannot use `A` and `B` anymore. 3494 3495 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MATSEQAIJ`, `MatCreateMPIAIJWithSplitArrays()` 3496 @*/ 3497 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, PetscInt M, PetscInt N, Mat A, Mat B, PetscInt *garray, Mat *mat) 3498 { 3499 PetscInt m, n; 3500 MatType mpi_mat_type; 3501 Mat_MPIAIJ *mpiaij; 3502 Mat C; 3503 3504 PetscFunctionBegin; 3505 PetscCall(MatCreate(comm, &C)); 3506 PetscCall(MatGetSize(A, &m, &n)); 3507 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3508 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3509 3510 PetscCall(MatSetSizes(C, m, n, M, N)); 3511 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3512 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3513 PetscCall(MatSetType(C, mpi_mat_type)); 3514 3515 PetscCall(MatSetBlockSizes(C, A->rmap->bs, A->cmap->bs)); 3516 PetscCall(PetscLayoutSetUp(C->rmap)); 3517 PetscCall(PetscLayoutSetUp(C->cmap)); 3518 3519 mpiaij = (Mat_MPIAIJ *)C->data; 3520 mpiaij->A = A; 3521 mpiaij->B = B; 3522 mpiaij->garray = garray; 3523 C->preallocated = PETSC_TRUE; 3524 C->nooffprocentries = PETSC_TRUE; /* See MatAssemblyBegin_MPIAIJ. In effect, making MatAssemblyBegin a nop */ 3525 3526 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3527 PetscCall(MatAssemblyBegin(C, MAT_FINAL_ASSEMBLY)); 3528 /* MatAssemblyEnd is critical here. It sets mat->offloadmask according to A and B's, and 3529 also gets mpiaij->B compacted (if garray is NULL), with its col ids and size reduced 3530 */ 3531 PetscCall(MatAssemblyEnd(C, MAT_FINAL_ASSEMBLY)); 3532 PetscCall(MatSetOption(C, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3533 PetscCall(MatSetOption(C, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3534 *mat = C; 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3539 3540 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) 3541 { 3542 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3543 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3544 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3545 Mat M, Msub, B = a->B; 3546 MatScalar *aa; 3547 Mat_SeqAIJ *aij; 3548 PetscInt *garray = a->garray, *colsub, Ncols; 3549 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3550 IS iscol_sub, iscmap; 3551 const PetscInt *is_idx, *cmap; 3552 PetscBool allcolumns = PETSC_FALSE; 3553 MPI_Comm comm; 3554 3555 PetscFunctionBegin; 3556 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3557 if (call == MAT_REUSE_MATRIX) { 3558 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3559 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3560 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3561 3562 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3563 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3564 3565 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3566 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3567 3568 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3569 3570 } else { /* call == MAT_INITIAL_MATRIX) */ 3571 PetscBool flg; 3572 3573 PetscCall(ISGetLocalSize(iscol, &n)); 3574 PetscCall(ISGetSize(iscol, &Ncols)); 3575 3576 /* (1) iscol -> nonscalable iscol_local */ 3577 /* Check for special case: each processor gets entire matrix columns */ 3578 PetscCall(ISIdentity(iscol_local, &flg)); 3579 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3580 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3581 if (allcolumns) { 3582 iscol_sub = iscol_local; 3583 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3584 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3585 3586 } else { 3587 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3588 PetscInt *idx, *cmap1, k; 3589 PetscCall(PetscMalloc1(Ncols, &idx)); 3590 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3591 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3592 count = 0; 3593 k = 0; 3594 for (i = 0; i < Ncols; i++) { 3595 j = is_idx[i]; 3596 if (j >= cstart && j < cend) { 3597 /* diagonal part of mat */ 3598 idx[count] = j; 3599 cmap1[count++] = i; /* column index in submat */ 3600 } else if (Bn) { 3601 /* off-diagonal part of mat */ 3602 if (j == garray[k]) { 3603 idx[count] = j; 3604 cmap1[count++] = i; /* column index in submat */ 3605 } else if (j > garray[k]) { 3606 while (j > garray[k] && k < Bn - 1) k++; 3607 if (j == garray[k]) { 3608 idx[count] = j; 3609 cmap1[count++] = i; /* column index in submat */ 3610 } 3611 } 3612 } 3613 } 3614 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3615 3616 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3617 PetscCall(ISGetBlockSize(iscol, &cbs)); 3618 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3619 3620 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3621 } 3622 3623 /* (3) Create sequential Msub */ 3624 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3625 } 3626 3627 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3628 aij = (Mat_SeqAIJ *)Msub->data; 3629 ii = aij->i; 3630 PetscCall(ISGetIndices(iscmap, &cmap)); 3631 3632 /* 3633 m - number of local rows 3634 Ncols - number of columns (same on all processors) 3635 rstart - first row in new global matrix generated 3636 */ 3637 PetscCall(MatGetSize(Msub, &m, NULL)); 3638 3639 if (call == MAT_INITIAL_MATRIX) { 3640 /* (4) Create parallel newmat */ 3641 PetscMPIInt rank, size; 3642 PetscInt csize; 3643 3644 PetscCallMPI(MPI_Comm_size(comm, &size)); 3645 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3646 3647 /* 3648 Determine the number of non-zeros in the diagonal and off-diagonal 3649 portions of the matrix in order to do correct preallocation 3650 */ 3651 3652 /* first get start and end of "diagonal" columns */ 3653 PetscCall(ISGetLocalSize(iscol, &csize)); 3654 if (csize == PETSC_DECIDE) { 3655 PetscCall(ISGetSize(isrow, &mglobal)); 3656 if (mglobal == Ncols) { /* square matrix */ 3657 nlocal = m; 3658 } else { 3659 nlocal = Ncols / size + ((Ncols % size) > rank); 3660 } 3661 } else { 3662 nlocal = csize; 3663 } 3664 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3665 rstart = rend - nlocal; 3666 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3667 3668 /* next, compute all the lengths */ 3669 jj = aij->j; 3670 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3671 olens = dlens + m; 3672 for (i = 0; i < m; i++) { 3673 jend = ii[i + 1] - ii[i]; 3674 olen = 0; 3675 dlen = 0; 3676 for (j = 0; j < jend; j++) { 3677 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3678 else dlen++; 3679 jj++; 3680 } 3681 olens[i] = olen; 3682 dlens[i] = dlen; 3683 } 3684 3685 PetscCall(ISGetBlockSize(isrow, &bs)); 3686 PetscCall(ISGetBlockSize(iscol, &cbs)); 3687 3688 PetscCall(MatCreate(comm, &M)); 3689 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3690 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3691 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3692 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3693 PetscCall(PetscFree(dlens)); 3694 3695 } else { /* call == MAT_REUSE_MATRIX */ 3696 M = *newmat; 3697 PetscCall(MatGetLocalSize(M, &i, NULL)); 3698 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3699 PetscCall(MatZeroEntries(M)); 3700 /* 3701 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3702 rather than the slower MatSetValues(). 3703 */ 3704 M->was_assembled = PETSC_TRUE; 3705 M->assembled = PETSC_FALSE; 3706 } 3707 3708 /* (5) Set values of Msub to *newmat */ 3709 PetscCall(PetscMalloc1(count, &colsub)); 3710 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3711 3712 jj = aij->j; 3713 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3714 for (i = 0; i < m; i++) { 3715 row = rstart + i; 3716 nz = ii[i + 1] - ii[i]; 3717 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3718 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3719 jj += nz; 3720 aa += nz; 3721 } 3722 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3723 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3724 3725 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3726 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3727 3728 PetscCall(PetscFree(colsub)); 3729 3730 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3731 if (call == MAT_INITIAL_MATRIX) { 3732 *newmat = M; 3733 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubMatrix", (PetscObject)Msub)); 3734 PetscCall(MatDestroy(&Msub)); 3735 3736 PetscCall(PetscObjectCompose((PetscObject)*newmat, "SubIScol", (PetscObject)iscol_sub)); 3737 PetscCall(ISDestroy(&iscol_sub)); 3738 3739 PetscCall(PetscObjectCompose((PetscObject)*newmat, "Subcmap", (PetscObject)iscmap)); 3740 PetscCall(ISDestroy(&iscmap)); 3741 3742 if (iscol_local) { 3743 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3744 PetscCall(ISDestroy(&iscol_local)); 3745 } 3746 } 3747 PetscFunctionReturn(PETSC_SUCCESS); 3748 } 3749 3750 /* 3751 Not great since it makes two copies of the submatrix, first an SeqAIJ 3752 in local and then by concatenating the local matrices the end result. 3753 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3754 3755 This requires a sequential iscol with all indices. 3756 */ 3757 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 3758 { 3759 PetscMPIInt rank, size; 3760 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3761 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3762 Mat M, Mreuse; 3763 MatScalar *aa, *vwork; 3764 MPI_Comm comm; 3765 Mat_SeqAIJ *aij; 3766 PetscBool colflag, allcolumns = PETSC_FALSE; 3767 3768 PetscFunctionBegin; 3769 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3770 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3771 PetscCallMPI(MPI_Comm_size(comm, &size)); 3772 3773 /* Check for special case: each processor gets entire matrix columns */ 3774 PetscCall(ISIdentity(iscol, &colflag)); 3775 PetscCall(ISGetLocalSize(iscol, &n)); 3776 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3777 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3778 3779 if (call == MAT_REUSE_MATRIX) { 3780 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3781 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3782 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3783 } else { 3784 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3785 } 3786 3787 /* 3788 m - number of local rows 3789 n - number of columns (same on all processors) 3790 rstart - first row in new global matrix generated 3791 */ 3792 PetscCall(MatGetSize(Mreuse, &m, &n)); 3793 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3794 if (call == MAT_INITIAL_MATRIX) { 3795 aij = (Mat_SeqAIJ *)Mreuse->data; 3796 ii = aij->i; 3797 jj = aij->j; 3798 3799 /* 3800 Determine the number of non-zeros in the diagonal and off-diagonal 3801 portions of the matrix in order to do correct preallocation 3802 */ 3803 3804 /* first get start and end of "diagonal" columns */ 3805 if (csize == PETSC_DECIDE) { 3806 PetscCall(ISGetSize(isrow, &mglobal)); 3807 if (mglobal == n) { /* square matrix */ 3808 nlocal = m; 3809 } else { 3810 nlocal = n / size + ((n % size) > rank); 3811 } 3812 } else { 3813 nlocal = csize; 3814 } 3815 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3816 rstart = rend - nlocal; 3817 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3818 3819 /* next, compute all the lengths */ 3820 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3821 olens = dlens + m; 3822 for (i = 0; i < m; i++) { 3823 jend = ii[i + 1] - ii[i]; 3824 olen = 0; 3825 dlen = 0; 3826 for (j = 0; j < jend; j++) { 3827 if (*jj < rstart || *jj >= rend) olen++; 3828 else dlen++; 3829 jj++; 3830 } 3831 olens[i] = olen; 3832 dlens[i] = dlen; 3833 } 3834 PetscCall(MatCreate(comm, &M)); 3835 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3836 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3837 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3838 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3839 PetscCall(PetscFree(dlens)); 3840 } else { 3841 PetscInt ml, nl; 3842 3843 M = *newmat; 3844 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3845 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3846 PetscCall(MatZeroEntries(M)); 3847 /* 3848 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3849 rather than the slower MatSetValues(). 3850 */ 3851 M->was_assembled = PETSC_TRUE; 3852 M->assembled = PETSC_FALSE; 3853 } 3854 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3855 aij = (Mat_SeqAIJ *)Mreuse->data; 3856 ii = aij->i; 3857 jj = aij->j; 3858 3859 /* trigger copy to CPU if needed */ 3860 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3861 for (i = 0; i < m; i++) { 3862 row = rstart + i; 3863 nz = ii[i + 1] - ii[i]; 3864 cwork = jj; 3865 jj = PetscSafePointerPlusOffset(jj, nz); 3866 vwork = aa; 3867 aa = PetscSafePointerPlusOffset(aa, nz); 3868 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3869 } 3870 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3871 3872 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3873 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3874 *newmat = M; 3875 3876 /* save submatrix used in processor for next request */ 3877 if (call == MAT_INITIAL_MATRIX) { 3878 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3879 PetscCall(MatDestroy(&Mreuse)); 3880 } 3881 PetscFunctionReturn(PETSC_SUCCESS); 3882 } 3883 3884 static PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 3885 { 3886 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3887 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii, irstart; 3888 const PetscInt *JJ; 3889 PetscBool nooffprocentries; 3890 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3891 3892 PetscFunctionBegin; 3893 PetscCall(PetscLayoutSetUp(B->rmap)); 3894 PetscCall(PetscLayoutSetUp(B->cmap)); 3895 m = B->rmap->n; 3896 cstart = B->cmap->rstart; 3897 cend = B->cmap->rend; 3898 rstart = B->rmap->rstart; 3899 irstart = Ii[0]; 3900 3901 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3902 3903 if (PetscDefined(USE_DEBUG)) { 3904 for (i = 0; i < m; i++) { 3905 nnz = Ii[i + 1] - Ii[i]; 3906 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3907 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3908 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3909 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3910 } 3911 } 3912 3913 for (i = 0; i < m; i++) { 3914 nnz = Ii[i + 1] - Ii[i]; 3915 JJ = PetscSafePointerPlusOffset(J, Ii[i] - irstart); 3916 nnz_max = PetscMax(nnz_max, nnz); 3917 d = 0; 3918 for (j = 0; j < nnz; j++) { 3919 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3920 } 3921 d_nnz[i] = d; 3922 o_nnz[i] = nnz - d; 3923 } 3924 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3925 PetscCall(PetscFree2(d_nnz, o_nnz)); 3926 3927 for (i = 0; i < m; i++) { 3928 ii = i + rstart; 3929 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], PetscSafePointerPlusOffset(J, Ii[i] - irstart), PetscSafePointerPlusOffset(v, Ii[i] - irstart), INSERT_VALUES)); 3930 } 3931 nooffprocentries = B->nooffprocentries; 3932 B->nooffprocentries = PETSC_TRUE; 3933 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3934 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3935 B->nooffprocentries = nooffprocentries; 3936 3937 /* count number of entries below block diagonal */ 3938 PetscCall(PetscFree(Aij->ld)); 3939 PetscCall(PetscCalloc1(m, &ld)); 3940 Aij->ld = ld; 3941 for (i = 0; i < m; i++) { 3942 nnz = Ii[i + 1] - Ii[i]; 3943 j = 0; 3944 while (j < nnz && J[j] < cstart) j++; 3945 ld[i] = j; 3946 if (J) J += nnz; 3947 } 3948 3949 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3950 PetscFunctionReturn(PETSC_SUCCESS); 3951 } 3952 3953 /*@ 3954 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in `MATAIJ` format 3955 (the default parallel PETSc format). 3956 3957 Collective 3958 3959 Input Parameters: 3960 + B - the matrix 3961 . i - the indices into `j` for the start of each local row (indices start with zero) 3962 . j - the column indices for each local row (indices start with zero) 3963 - v - optional values in the matrix 3964 3965 Level: developer 3966 3967 Notes: 3968 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 3969 thus you CANNOT change the matrix entries by changing the values of `v` after you have 3970 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3971 3972 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3973 3974 A convenience routine for this functionality is `MatCreateMPIAIJWithArrays()`. 3975 3976 You can update the matrix with new numerical values using `MatUpdateMPIAIJWithArrays()` after this call if the column indices in `j` are sorted. 3977 3978 If you do **not** use `MatUpdateMPIAIJWithArrays()`, the column indices in `j` do not need to be sorted. If you will use 3979 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 3980 3981 The format which is used for the sparse matrix input, is equivalent to a 3982 row-major ordering.. i.e for the following matrix, the input data expected is 3983 as shown 3984 .vb 3985 1 0 0 3986 2 0 3 P0 3987 ------- 3988 4 5 6 P1 3989 3990 Process0 [P0] rows_owned=[0,1] 3991 i = {0,1,3} [size = nrow+1 = 2+1] 3992 j = {0,0,2} [size = 3] 3993 v = {1,2,3} [size = 3] 3994 3995 Process1 [P1] rows_owned=[2] 3996 i = {0,3} [size = nrow+1 = 1+1] 3997 j = {0,1,2} [size = 3] 3998 v = {4,5,6} [size = 3] 3999 .ve 4000 4001 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, 4002 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()`, `MatCreateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4003 @*/ 4004 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 4005 { 4006 PetscFunctionBegin; 4007 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 4008 PetscFunctionReturn(PETSC_SUCCESS); 4009 } 4010 4011 /*@ 4012 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in `MATMPIAIJ` format 4013 (the default parallel PETSc format). For good matrix assembly performance 4014 the user should preallocate the matrix storage by setting the parameters 4015 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4016 4017 Collective 4018 4019 Input Parameters: 4020 + B - the matrix 4021 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4022 (same value is used for all local rows) 4023 . d_nnz - array containing the number of nonzeros in the various rows of the 4024 DIAGONAL portion of the local submatrix (possibly different for each row) 4025 or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `d_nz` is used to specify the nonzero structure. 4026 The size of this array is equal to the number of local rows, i.e 'm'. 4027 For matrices that will be factored, you must leave room for (and set) 4028 the diagonal entry even if it is zero. 4029 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4030 submatrix (same value is used for all local rows). 4031 - o_nnz - array containing the number of nonzeros in the various rows of the 4032 OFF-DIAGONAL portion of the local submatrix (possibly different for 4033 each row) or `NULL` (`PETSC_NULL_INTEGER` in Fortran), if `o_nz` is used to specify the nonzero 4034 structure. The size of this array is equal to the number 4035 of local rows, i.e 'm'. 4036 4037 Example Usage: 4038 Consider the following 8x8 matrix with 34 non-zero values, that is 4039 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4040 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4041 as follows 4042 4043 .vb 4044 1 2 0 | 0 3 0 | 0 4 4045 Proc0 0 5 6 | 7 0 0 | 8 0 4046 9 0 10 | 11 0 0 | 12 0 4047 ------------------------------------- 4048 13 0 14 | 15 16 17 | 0 0 4049 Proc1 0 18 0 | 19 20 21 | 0 0 4050 0 0 0 | 22 23 0 | 24 0 4051 ------------------------------------- 4052 Proc2 25 26 27 | 0 0 28 | 29 0 4053 30 0 0 | 31 32 33 | 0 34 4054 .ve 4055 4056 This can be represented as a collection of submatrices as 4057 .vb 4058 A B C 4059 D E F 4060 G H I 4061 .ve 4062 4063 Where the submatrices A,B,C are owned by proc0, D,E,F are 4064 owned by proc1, G,H,I are owned by proc2. 4065 4066 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4067 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4068 The 'M','N' parameters are 8,8, and have the same values on all procs. 4069 4070 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4071 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4072 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4073 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4074 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4075 matrix, and [DF] as another `MATSEQAIJ` matrix. 4076 4077 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4078 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4079 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4080 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4081 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4082 In this case, the values of `d_nz`, `o_nz` are 4083 .vb 4084 proc0 dnz = 2, o_nz = 2 4085 proc1 dnz = 3, o_nz = 2 4086 proc2 dnz = 1, o_nz = 4 4087 .ve 4088 We are allocating `m`*(`d_nz`+`o_nz`) storage locations for every proc. This 4089 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4090 for proc3. i.e we are using 12+15+10=37 storage locations to store 4091 34 values. 4092 4093 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4094 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4095 In the above case the values for `d_nnz`, `o_nnz` are 4096 .vb 4097 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4098 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4099 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4100 .ve 4101 Here the space allocated is sum of all the above values i.e 34, and 4102 hence pre-allocation is perfect. 4103 4104 Level: intermediate 4105 4106 Notes: 4107 If the *_nnz parameter is given then the *_nz parameter is ignored 4108 4109 The `MATAIJ` format, also called compressed row storage (CSR), is compatible with standard Fortran 4110 storage. The stored row and column indices begin with zero. 4111 See [Sparse Matrices](sec_matsparse) for details. 4112 4113 The parallel matrix is partitioned such that the first m0 rows belong to 4114 process 0, the next m1 rows belong to process 1, the next m2 rows belong 4115 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 4116 4117 The DIAGONAL portion of the local submatrix of a processor can be defined 4118 as the submatrix which is obtained by extraction the part corresponding to 4119 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 4120 first row that belongs to the processor, r2 is the last row belonging to 4121 the this processor, and c1-c2 is range of indices of the local part of a 4122 vector suitable for applying the matrix to. This is an mxn matrix. In the 4123 common case of a square matrix, the row and column ranges are the same and 4124 the DIAGONAL part is also square. The remaining portion of the local 4125 submatrix (mxN) constitute the OFF-DIAGONAL portion. 4126 4127 If `o_nnz` and `d_nnz` are specified, then `o_nz` and `d_nz` are ignored. 4128 4129 You can call `MatGetInfo()` to get information on how effective the preallocation was; 4130 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 4131 You can also run with the option `-info` and look for messages with the string 4132 malloc in them to see if additional memory allocation was needed. 4133 4134 .seealso: [](ch_matrices), `Mat`, [Sparse Matrices](sec_matsparse), `MATMPIAIJ`, `MATAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4135 `MatGetInfo()`, `PetscSplitOwnership()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4136 @*/ 4137 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 4138 { 4139 PetscFunctionBegin; 4140 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4141 PetscValidType(B, 1); 4142 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4143 PetscFunctionReturn(PETSC_SUCCESS); 4144 } 4145 4146 /*@ 4147 MatCreateMPIAIJWithArrays - creates a `MATMPIAIJ` matrix using arrays that contain in standard 4148 CSR format for the local rows. 4149 4150 Collective 4151 4152 Input Parameters: 4153 + comm - MPI communicator 4154 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4155 . n - This value should be the same as the local size used in creating the 4156 x vector for the matrix-vector product $ y = Ax$. (or `PETSC_DECIDE` to have 4157 calculated if `N` is given) For square matrices n is almost always `m`. 4158 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 4159 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 4160 . i - row indices (of length m+1); that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4161 . j - global column indices 4162 - a - optional matrix values 4163 4164 Output Parameter: 4165 . mat - the matrix 4166 4167 Level: intermediate 4168 4169 Notes: 4170 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 4171 thus you CANNOT change the matrix entries by changing the values of `a[]` after you have 4172 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 4173 4174 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 4175 4176 Once you have created the matrix you can update it with new numerical values using `MatUpdateMPIAIJWithArray()` 4177 4178 If you do **not** use `MatUpdateMPIAIJWithArray()`, the column indices in `j` do not need to be sorted. If you will use 4179 `MatUpdateMPIAIJWithArrays()`, the column indices **must** be sorted. 4180 4181 The format which is used for the sparse matrix input, is equivalent to a 4182 row-major ordering, i.e., for the following matrix, the input data expected is 4183 as shown 4184 .vb 4185 1 0 0 4186 2 0 3 P0 4187 ------- 4188 4 5 6 P1 4189 4190 Process0 [P0] rows_owned=[0,1] 4191 i = {0,1,3} [size = nrow+1 = 2+1] 4192 j = {0,0,2} [size = 3] 4193 v = {1,2,3} [size = 3] 4194 4195 Process1 [P1] rows_owned=[2] 4196 i = {0,3} [size = nrow+1 = 1+1] 4197 j = {0,1,2} [size = 3] 4198 v = {4,5,6} [size = 3] 4199 .ve 4200 4201 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4202 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4203 @*/ 4204 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 4205 { 4206 PetscFunctionBegin; 4207 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4208 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4209 PetscCall(MatCreate(comm, mat)); 4210 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4211 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4212 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4213 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4214 PetscFunctionReturn(PETSC_SUCCESS); 4215 } 4216 4217 /*@ 4218 MatUpdateMPIAIJWithArrays - updates a `MATMPIAIJ` matrix using arrays that contain in standard 4219 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed 4220 from `MatCreateMPIAIJWithArrays()` 4221 4222 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4223 4224 Collective 4225 4226 Input Parameters: 4227 + mat - the matrix 4228 . m - number of local rows (Cannot be `PETSC_DECIDE`) 4229 . n - This value should be the same as the local size used in creating the 4230 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4231 calculated if N is given) For square matrices n is almost always m. 4232 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4233 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4234 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4235 . J - column indices 4236 - v - matrix values 4237 4238 Level: deprecated 4239 4240 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4241 `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArray()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4242 @*/ 4243 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) 4244 { 4245 PetscInt nnz, i; 4246 PetscBool nooffprocentries; 4247 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4248 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4249 PetscScalar *ad, *ao; 4250 PetscInt ldi, Iii, md; 4251 const PetscInt *Adi = Ad->i; 4252 PetscInt *ld = Aij->ld; 4253 4254 PetscFunctionBegin; 4255 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4256 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4257 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4258 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4259 4260 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4261 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4262 4263 for (i = 0; i < m; i++) { 4264 if (PetscDefined(USE_DEBUG)) { 4265 for (PetscInt j = Ii[i] + 1; j < Ii[i + 1]; ++j) { 4266 PetscCheck(J[j] >= J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is not sorted", j - Ii[i], J[j], i); 4267 PetscCheck(J[j] != J[j - 1], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column entry number %" PetscInt_FMT " (actual column %" PetscInt_FMT ") in row %" PetscInt_FMT " is identical to previous entry", j - Ii[i], J[j], i); 4268 } 4269 } 4270 nnz = Ii[i + 1] - Ii[i]; 4271 Iii = Ii[i]; 4272 ldi = ld[i]; 4273 md = Adi[i + 1] - Adi[i]; 4274 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4275 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4276 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4277 ad += md; 4278 ao += nnz - md; 4279 } 4280 nooffprocentries = mat->nooffprocentries; 4281 mat->nooffprocentries = PETSC_TRUE; 4282 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4283 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4284 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4285 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4286 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4287 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4288 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4289 mat->nooffprocentries = nooffprocentries; 4290 PetscFunctionReturn(PETSC_SUCCESS); 4291 } 4292 4293 /*@ 4294 MatUpdateMPIAIJWithArray - updates an `MATMPIAIJ` matrix using an array that contains the nonzero values 4295 4296 Collective 4297 4298 Input Parameters: 4299 + mat - the matrix 4300 - v - matrix values, stored by row 4301 4302 Level: intermediate 4303 4304 Notes: 4305 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4306 4307 The column indices in the call to `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` must have been sorted for this call to work correctly 4308 4309 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4310 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatSetPreallocationCOO()`, `MatSetValuesCOO()` 4311 @*/ 4312 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) 4313 { 4314 PetscInt nnz, i, m; 4315 PetscBool nooffprocentries; 4316 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4317 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4318 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4319 PetscScalar *ad, *ao; 4320 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4321 PetscInt ldi, Iii, md; 4322 PetscInt *ld = Aij->ld; 4323 4324 PetscFunctionBegin; 4325 m = mat->rmap->n; 4326 4327 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4328 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4329 Iii = 0; 4330 for (i = 0; i < m; i++) { 4331 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4332 ldi = ld[i]; 4333 md = Adi[i + 1] - Adi[i]; 4334 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4335 ad += md; 4336 if (ao) { 4337 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4338 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4339 ao += nnz - md; 4340 } 4341 Iii += nnz; 4342 } 4343 nooffprocentries = mat->nooffprocentries; 4344 mat->nooffprocentries = PETSC_TRUE; 4345 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4346 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4347 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4348 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4349 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4350 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4351 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4352 mat->nooffprocentries = nooffprocentries; 4353 PetscFunctionReturn(PETSC_SUCCESS); 4354 } 4355 4356 /*@ 4357 MatCreateAIJ - Creates a sparse parallel matrix in `MATAIJ` format 4358 (the default parallel PETSc format). For good matrix assembly performance 4359 the user should preallocate the matrix storage by setting the parameters 4360 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). 4361 4362 Collective 4363 4364 Input Parameters: 4365 + comm - MPI communicator 4366 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 4367 This value should be the same as the local size used in creating the 4368 y vector for the matrix-vector product y = Ax. 4369 . n - This value should be the same as the local size used in creating the 4370 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 4371 calculated if N is given) For square matrices n is almost always m. 4372 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 4373 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 4374 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4375 (same value is used for all local rows) 4376 . d_nnz - array containing the number of nonzeros in the various rows of the 4377 DIAGONAL portion of the local submatrix (possibly different for each row) 4378 or `NULL`, if `d_nz` is used to specify the nonzero structure. 4379 The size of this array is equal to the number of local rows, i.e 'm'. 4380 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4381 submatrix (same value is used for all local rows). 4382 - o_nnz - array containing the number of nonzeros in the various rows of the 4383 OFF-DIAGONAL portion of the local submatrix (possibly different for 4384 each row) or `NULL`, if `o_nz` is used to specify the nonzero 4385 structure. The size of this array is equal to the number 4386 of local rows, i.e 'm'. 4387 4388 Output Parameter: 4389 . A - the matrix 4390 4391 Options Database Keys: 4392 + -mat_no_inode - Do not use inodes 4393 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4394 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in `MatMult()` of sparse parallel matrices. 4395 See viewer types in manual of `MatView()`. Of them, ascii_matlab, draw or binary cause the `VecScatter` 4396 to be viewed as a matrix. Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one `MatMult()` call. 4397 4398 Level: intermediate 4399 4400 Notes: 4401 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 4402 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4403 [MatXXXXSetPreallocation() is, for example, `MatSeqAIJSetPreallocation()`] 4404 4405 If the *_nnz parameter is given then the *_nz parameter is ignored 4406 4407 The `m`,`n`,`M`,`N` parameters specify the size of the matrix, and its partitioning across 4408 processors, while `d_nz`,`d_nnz`,`o_nz`,`o_nnz` parameters specify the approximate 4409 storage requirements for this matrix. 4410 4411 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one 4412 processor than it must be used on all processors that share the object for 4413 that argument. 4414 4415 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 4416 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 4417 4418 The user MUST specify either the local or global matrix dimensions 4419 (possibly both). 4420 4421 The parallel matrix is partitioned across processors such that the 4422 first `m0` rows belong to process 0, the next `m1` rows belong to 4423 process 1, the next `m2` rows belong to process 2, etc., where 4424 `m0`, `m1`, `m2`... are the input parameter `m` on each MPI process. I.e., each MPI process stores 4425 values corresponding to [m x N] submatrix. 4426 4427 The columns are logically partitioned with the n0 columns belonging 4428 to 0th partition, the next n1 columns belonging to the next 4429 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4430 4431 The DIAGONAL portion of the local submatrix on any given processor 4432 is the submatrix corresponding to the rows and columns m,n 4433 corresponding to the given processor. i.e diagonal matrix on 4434 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4435 etc. The remaining portion of the local submatrix [m x (N-n)] 4436 constitute the OFF-DIAGONAL portion. The example below better 4437 illustrates this concept. The two matrices, the DIAGONAL portion and 4438 the OFF-DIAGONAL portion are each stored as `MATSEQAIJ` matrices. 4439 4440 For a square global matrix we define each processor's diagonal portion 4441 to be its local rows and the corresponding columns (a square submatrix); 4442 each processor's off-diagonal portion encompasses the remainder of the 4443 local matrix (a rectangular submatrix). 4444 4445 If `o_nnz`, `d_nnz` are specified, then `o_nz`, and `d_nz` are ignored. 4446 4447 When calling this routine with a single process communicator, a matrix of 4448 type `MATSEQAIJ` is returned. If a matrix of type `MATMPIAIJ` is desired for this 4449 type of communicator, use the construction mechanism 4450 .vb 4451 MatCreate(..., &A); 4452 MatSetType(A, MATMPIAIJ); 4453 MatSetSizes(A, m, n, M, N); 4454 MatMPIAIJSetPreallocation(A, ...); 4455 .ve 4456 4457 By default, this format uses inodes (identical nodes) when possible. 4458 We search for consecutive rows with the same nonzero structure, thereby 4459 reusing matrix information to achieve increased efficiency. 4460 4461 Example Usage: 4462 Consider the following 8x8 matrix with 34 non-zero values, that is 4463 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4464 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4465 as follows 4466 4467 .vb 4468 1 2 0 | 0 3 0 | 0 4 4469 Proc0 0 5 6 | 7 0 0 | 8 0 4470 9 0 10 | 11 0 0 | 12 0 4471 ------------------------------------- 4472 13 0 14 | 15 16 17 | 0 0 4473 Proc1 0 18 0 | 19 20 21 | 0 0 4474 0 0 0 | 22 23 0 | 24 0 4475 ------------------------------------- 4476 Proc2 25 26 27 | 0 0 28 | 29 0 4477 30 0 0 | 31 32 33 | 0 34 4478 .ve 4479 4480 This can be represented as a collection of submatrices as 4481 4482 .vb 4483 A B C 4484 D E F 4485 G H I 4486 .ve 4487 4488 Where the submatrices A,B,C are owned by proc0, D,E,F are 4489 owned by proc1, G,H,I are owned by proc2. 4490 4491 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4492 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4493 The 'M','N' parameters are 8,8, and have the same values on all procs. 4494 4495 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4496 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4497 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4498 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4499 part as `MATSEQAIJ` matrices. For example, proc1 will store [E] as a `MATSEQAIJ` 4500 matrix, and [DF] as another SeqAIJ matrix. 4501 4502 When `d_nz`, `o_nz` parameters are specified, `d_nz` storage elements are 4503 allocated for every row of the local DIAGONAL submatrix, and `o_nz` 4504 storage locations are allocated for every row of the OFF-DIAGONAL submatrix. 4505 One way to choose `d_nz` and `o_nz` is to use the maximum number of nonzeros over 4506 the local rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4507 In this case, the values of `d_nz`,`o_nz` are 4508 .vb 4509 proc0 dnz = 2, o_nz = 2 4510 proc1 dnz = 3, o_nz = 2 4511 proc2 dnz = 1, o_nz = 4 4512 .ve 4513 We are allocating m*(`d_nz`+`o_nz`) storage locations for every proc. This 4514 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4515 for proc3. i.e we are using 12+15+10=37 storage locations to store 4516 34 values. 4517 4518 When `d_nnz`, `o_nnz` parameters are specified, the storage is specified 4519 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4520 In the above case the values for d_nnz,o_nnz are 4521 .vb 4522 proc0 d_nnz = [2,2,2] and o_nnz = [2,2,2] 4523 proc1 d_nnz = [3,3,2] and o_nnz = [2,1,1] 4524 proc2 d_nnz = [1,1] and o_nnz = [4,4] 4525 .ve 4526 Here the space allocated is sum of all the above values i.e 34, and 4527 hence pre-allocation is perfect. 4528 4529 .seealso: [](ch_matrices), `Mat`, [Sparse Matrix Creation](sec_matsparse), `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4530 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()`, `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, 4531 `MatGetOwnershipRangesColumn()`, `PetscLayout` 4532 @*/ 4533 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 4534 { 4535 PetscMPIInt size; 4536 4537 PetscFunctionBegin; 4538 PetscCall(MatCreate(comm, A)); 4539 PetscCall(MatSetSizes(*A, m, n, M, N)); 4540 PetscCallMPI(MPI_Comm_size(comm, &size)); 4541 if (size > 1) { 4542 PetscCall(MatSetType(*A, MATMPIAIJ)); 4543 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4544 } else { 4545 PetscCall(MatSetType(*A, MATSEQAIJ)); 4546 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4547 } 4548 PetscFunctionReturn(PETSC_SUCCESS); 4549 } 4550 4551 /*@C 4552 MatMPIAIJGetSeqAIJ - Returns the local pieces of this distributed matrix 4553 4554 Not Collective 4555 4556 Input Parameter: 4557 . A - The `MATMPIAIJ` matrix 4558 4559 Output Parameters: 4560 + Ad - The local diagonal block as a `MATSEQAIJ` matrix 4561 . Ao - The local off-diagonal block as a `MATSEQAIJ` matrix 4562 - colmap - An array mapping local column numbers of `Ao` to global column numbers of the parallel matrix 4563 4564 Level: intermediate 4565 4566 Note: 4567 The rows in `Ad` and `Ao` are in [0, Nr), where Nr is the number of local rows on this process. The columns 4568 in `Ad` are in [0, Nc) where Nc is the number of local columns. The columns are `Ao` are in [0, Nco), where Nco is 4569 the number of nonzero columns in the local off-diagonal piece of the matrix `A`. The array colmap maps these 4570 local column numbers to global column numbers in the original matrix. 4571 4572 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATSEQAIJ` 4573 @*/ 4574 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 4575 { 4576 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4577 PetscBool flg; 4578 4579 PetscFunctionBegin; 4580 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4581 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4582 if (Ad) *Ad = a->A; 4583 if (Ao) *Ao = a->B; 4584 if (colmap) *colmap = a->garray; 4585 PetscFunctionReturn(PETSC_SUCCESS); 4586 } 4587 4588 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 4589 { 4590 PetscInt m, N, i, rstart, nnz, Ii; 4591 PetscInt *indx; 4592 PetscScalar *values; 4593 MatType rootType; 4594 4595 PetscFunctionBegin; 4596 PetscCall(MatGetSize(inmat, &m, &N)); 4597 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4598 PetscInt *dnz, *onz, sum, bs, cbs; 4599 4600 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnership(comm, &n, &N)); 4601 /* Check sum(n) = N */ 4602 PetscCallMPI(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4603 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4604 4605 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4606 rstart -= m; 4607 4608 MatPreallocateBegin(comm, m, n, dnz, onz); 4609 for (i = 0; i < m; i++) { 4610 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4611 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4612 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4613 } 4614 4615 PetscCall(MatCreate(comm, outmat)); 4616 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4617 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4618 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4619 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4620 PetscCall(MatSetType(*outmat, rootType)); 4621 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4622 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4623 MatPreallocateEnd(dnz, onz); 4624 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4625 } 4626 4627 /* numeric phase */ 4628 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4629 for (i = 0; i < m; i++) { 4630 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4631 Ii = i + rstart; 4632 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4633 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4634 } 4635 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4636 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4637 PetscFunctionReturn(PETSC_SUCCESS); 4638 } 4639 4640 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void **data) 4641 { 4642 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)*data; 4643 4644 PetscFunctionBegin; 4645 if (!merge) PetscFunctionReturn(PETSC_SUCCESS); 4646 PetscCall(PetscFree(merge->id_r)); 4647 PetscCall(PetscFree(merge->len_s)); 4648 PetscCall(PetscFree(merge->len_r)); 4649 PetscCall(PetscFree(merge->bi)); 4650 PetscCall(PetscFree(merge->bj)); 4651 PetscCall(PetscFree(merge->buf_ri[0])); 4652 PetscCall(PetscFree(merge->buf_ri)); 4653 PetscCall(PetscFree(merge->buf_rj[0])); 4654 PetscCall(PetscFree(merge->buf_rj)); 4655 PetscCall(PetscFree(merge->coi)); 4656 PetscCall(PetscFree(merge->coj)); 4657 PetscCall(PetscFree(merge->owners_co)); 4658 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4659 PetscCall(PetscFree(merge)); 4660 PetscFunctionReturn(PETSC_SUCCESS); 4661 } 4662 4663 #include <../src/mat/utils/freespace.h> 4664 #include <petscbt.h> 4665 4666 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) 4667 { 4668 MPI_Comm comm; 4669 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4670 PetscMPIInt size, rank, taga, *len_s; 4671 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj, m; 4672 PetscMPIInt proc, k; 4673 PetscInt **buf_ri, **buf_rj; 4674 PetscInt anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4675 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4676 MPI_Request *s_waits, *r_waits; 4677 MPI_Status *status; 4678 const MatScalar *aa, *a_a; 4679 MatScalar **abuf_r, *ba_i; 4680 Mat_Merge_SeqsToMPI *merge; 4681 PetscContainer container; 4682 4683 PetscFunctionBegin; 4684 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4685 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4686 4687 PetscCallMPI(MPI_Comm_size(comm, &size)); 4688 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4689 4690 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4691 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4692 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4693 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4694 aa = a_a; 4695 4696 bi = merge->bi; 4697 bj = merge->bj; 4698 buf_ri = merge->buf_ri; 4699 buf_rj = merge->buf_rj; 4700 4701 PetscCall(PetscMalloc1(size, &status)); 4702 owners = merge->rowmap->range; 4703 len_s = merge->len_s; 4704 4705 /* send and recv matrix values */ 4706 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4707 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4708 4709 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4710 for (proc = 0, k = 0; proc < size; proc++) { 4711 if (!len_s[proc]) continue; 4712 i = owners[proc]; 4713 PetscCallMPI(MPIU_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4714 k++; 4715 } 4716 4717 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4718 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4719 PetscCall(PetscFree(status)); 4720 4721 PetscCall(PetscFree(s_waits)); 4722 PetscCall(PetscFree(r_waits)); 4723 4724 /* insert mat values of mpimat */ 4725 PetscCall(PetscMalloc1(N, &ba_i)); 4726 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4727 4728 for (k = 0; k < merge->nrecv; k++) { 4729 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4730 nrows = *buf_ri_k[k]; 4731 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4732 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4733 } 4734 4735 /* set values of ba */ 4736 m = merge->rowmap->n; 4737 for (i = 0; i < m; i++) { 4738 arow = owners[rank] + i; 4739 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4740 bnzi = bi[i + 1] - bi[i]; 4741 PetscCall(PetscArrayzero(ba_i, bnzi)); 4742 4743 /* add local non-zero vals of this proc's seqmat into ba */ 4744 anzi = ai[arow + 1] - ai[arow]; 4745 aj = a->j + ai[arow]; 4746 aa = a_a + ai[arow]; 4747 nextaj = 0; 4748 for (j = 0; nextaj < anzi; j++) { 4749 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4750 ba_i[j] += aa[nextaj++]; 4751 } 4752 } 4753 4754 /* add received vals into ba */ 4755 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4756 /* i-th row */ 4757 if (i == *nextrow[k]) { 4758 anzi = *(nextai[k] + 1) - *nextai[k]; 4759 aj = buf_rj[k] + *nextai[k]; 4760 aa = abuf_r[k] + *nextai[k]; 4761 nextaj = 0; 4762 for (j = 0; nextaj < anzi; j++) { 4763 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4764 ba_i[j] += aa[nextaj++]; 4765 } 4766 } 4767 nextrow[k]++; 4768 nextai[k]++; 4769 } 4770 } 4771 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4772 } 4773 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4774 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4775 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4776 4777 PetscCall(PetscFree(abuf_r[0])); 4778 PetscCall(PetscFree(abuf_r)); 4779 PetscCall(PetscFree(ba_i)); 4780 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4781 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4782 PetscFunctionReturn(PETSC_SUCCESS); 4783 } 4784 4785 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) 4786 { 4787 Mat B_mpi; 4788 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4789 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4790 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4791 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4792 PetscInt len, *dnz, *onz, bs, cbs; 4793 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4794 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4795 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4796 MPI_Status *status; 4797 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4798 PetscBT lnkbt; 4799 Mat_Merge_SeqsToMPI *merge; 4800 PetscContainer container; 4801 4802 PetscFunctionBegin; 4803 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4804 4805 /* make sure it is a PETSc comm */ 4806 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4807 PetscCallMPI(MPI_Comm_size(comm, &size)); 4808 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4809 4810 PetscCall(PetscNew(&merge)); 4811 PetscCall(PetscMalloc1(size, &status)); 4812 4813 /* determine row ownership */ 4814 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4815 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4816 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4817 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4818 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4819 PetscCall(PetscMalloc1(size, &len_si)); 4820 PetscCall(PetscMalloc1(size, &merge->len_s)); 4821 4822 m = merge->rowmap->n; 4823 owners = merge->rowmap->range; 4824 4825 /* determine the number of messages to send, their lengths */ 4826 len_s = merge->len_s; 4827 4828 len = 0; /* length of buf_si[] */ 4829 merge->nsend = 0; 4830 for (PetscMPIInt proc = 0; proc < size; proc++) { 4831 len_si[proc] = 0; 4832 if (proc == rank) { 4833 len_s[proc] = 0; 4834 } else { 4835 PetscCall(PetscMPIIntCast(owners[proc + 1] - owners[proc] + 1, &len_si[proc])); 4836 PetscCall(PetscMPIIntCast(ai[owners[proc + 1]] - ai[owners[proc]], &len_s[proc])); /* num of rows to be sent to [proc] */ 4837 } 4838 if (len_s[proc]) { 4839 merge->nsend++; 4840 nrows = 0; 4841 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4842 if (ai[i + 1] > ai[i]) nrows++; 4843 } 4844 PetscCall(PetscMPIIntCast(2 * (nrows + 1), &len_si[proc])); 4845 len += len_si[proc]; 4846 } 4847 } 4848 4849 /* determine the number and length of messages to receive for ij-structure */ 4850 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4851 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4852 4853 /* post the Irecv of j-structure */ 4854 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4855 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4856 4857 /* post the Isend of j-structure */ 4858 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4859 4860 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4861 if (!len_s[proc]) continue; 4862 i = owners[proc]; 4863 PetscCallMPI(MPIU_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4864 k++; 4865 } 4866 4867 /* receives and sends of j-structure are complete */ 4868 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4869 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4870 4871 /* send and recv i-structure */ 4872 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4873 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4874 4875 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4876 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4877 for (PetscMPIInt proc = 0, k = 0; proc < size; proc++) { 4878 if (!len_s[proc]) continue; 4879 /* form outgoing message for i-structure: 4880 buf_si[0]: nrows to be sent 4881 [1:nrows]: row index (global) 4882 [nrows+1:2*nrows+1]: i-structure index 4883 */ 4884 nrows = len_si[proc] / 2 - 1; 4885 buf_si_i = buf_si + nrows + 1; 4886 buf_si[0] = nrows; 4887 buf_si_i[0] = 0; 4888 nrows = 0; 4889 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4890 anzi = ai[i + 1] - ai[i]; 4891 if (anzi) { 4892 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4893 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4894 nrows++; 4895 } 4896 } 4897 PetscCallMPI(MPIU_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4898 k++; 4899 buf_si += len_si[proc]; 4900 } 4901 4902 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4903 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4904 4905 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4906 for (i = 0; i < merge->nrecv; i++) PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); 4907 4908 PetscCall(PetscFree(len_si)); 4909 PetscCall(PetscFree(len_ri)); 4910 PetscCall(PetscFree(rj_waits)); 4911 PetscCall(PetscFree2(si_waits, sj_waits)); 4912 PetscCall(PetscFree(ri_waits)); 4913 PetscCall(PetscFree(buf_s)); 4914 PetscCall(PetscFree(status)); 4915 4916 /* compute a local seq matrix in each processor */ 4917 /* allocate bi array and free space for accumulating nonzero column info */ 4918 PetscCall(PetscMalloc1(m + 1, &bi)); 4919 bi[0] = 0; 4920 4921 /* create and initialize a linked list */ 4922 nlnk = N + 1; 4923 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4924 4925 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4926 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4927 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4928 4929 current_space = free_space; 4930 4931 /* determine symbolic info for each local row */ 4932 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4933 4934 for (k = 0; k < merge->nrecv; k++) { 4935 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4936 nrows = *buf_ri_k[k]; 4937 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4938 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4939 } 4940 4941 MatPreallocateBegin(comm, m, n, dnz, onz); 4942 len = 0; 4943 for (i = 0; i < m; i++) { 4944 bnzi = 0; 4945 /* add local non-zero cols of this proc's seqmat into lnk */ 4946 arow = owners[rank] + i; 4947 anzi = ai[arow + 1] - ai[arow]; 4948 aj = a->j + ai[arow]; 4949 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4950 bnzi += nlnk; 4951 /* add received col data into lnk */ 4952 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4953 if (i == *nextrow[k]) { /* i-th row */ 4954 anzi = *(nextai[k] + 1) - *nextai[k]; 4955 aj = buf_rj[k] + *nextai[k]; 4956 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4957 bnzi += nlnk; 4958 nextrow[k]++; 4959 nextai[k]++; 4960 } 4961 } 4962 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4963 4964 /* if free space is not available, make more free space */ 4965 if (current_space->local_remaining < bnzi) PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); 4966 /* copy data into free space, then initialize lnk */ 4967 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4968 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4969 4970 current_space->array += bnzi; 4971 current_space->local_used += bnzi; 4972 current_space->local_remaining -= bnzi; 4973 4974 bi[i + 1] = bi[i] + bnzi; 4975 } 4976 4977 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4978 4979 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4980 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4981 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4982 4983 /* create symbolic parallel matrix B_mpi */ 4984 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4985 PetscCall(MatCreate(comm, &B_mpi)); 4986 if (n == PETSC_DECIDE) { 4987 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4988 } else { 4989 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4990 } 4991 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4992 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4993 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4994 MatPreallocateEnd(dnz, onz); 4995 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4996 4997 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4998 B_mpi->assembled = PETSC_FALSE; 4999 merge->bi = bi; 5000 merge->bj = bj; 5001 merge->buf_ri = buf_ri; 5002 merge->buf_rj = buf_rj; 5003 merge->coi = NULL; 5004 merge->coj = NULL; 5005 merge->owners_co = NULL; 5006 5007 PetscCall(PetscCommDestroy(&comm)); 5008 5009 /* attach the supporting struct to B_mpi for reuse */ 5010 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 5011 PetscCall(PetscContainerSetPointer(container, merge)); 5012 PetscCall(PetscContainerSetCtxDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 5013 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 5014 PetscCall(PetscContainerDestroy(&container)); 5015 *mpimat = B_mpi; 5016 5017 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 5018 PetscFunctionReturn(PETSC_SUCCESS); 5019 } 5020 5021 /*@ 5022 MatCreateMPIAIJSumSeqAIJ - Creates a `MATMPIAIJ` matrix by adding sequential 5023 matrices from each processor 5024 5025 Collective 5026 5027 Input Parameters: 5028 + comm - the communicators the parallel matrix will live on 5029 . seqmat - the input sequential matrices 5030 . m - number of local rows (or `PETSC_DECIDE`) 5031 . n - number of local columns (or `PETSC_DECIDE`) 5032 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5033 5034 Output Parameter: 5035 . mpimat - the parallel matrix generated 5036 5037 Level: advanced 5038 5039 Note: 5040 The dimensions of the sequential matrix in each processor MUST be the same. 5041 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 5042 destroyed when `mpimat` is destroyed. Call `PetscObjectQuery()` to access `seqmat`. 5043 5044 .seealso: [](ch_matrices), `Mat`, `MatCreateAIJ()` 5045 @*/ 5046 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) 5047 { 5048 PetscMPIInt size; 5049 5050 PetscFunctionBegin; 5051 PetscCallMPI(MPI_Comm_size(comm, &size)); 5052 if (size == 1) { 5053 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5054 if (scall == MAT_INITIAL_MATRIX) { 5055 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 5056 } else { 5057 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 5058 } 5059 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5060 PetscFunctionReturn(PETSC_SUCCESS); 5061 } 5062 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 5063 if (scall == MAT_INITIAL_MATRIX) PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); 5064 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 5065 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 5066 PetscFunctionReturn(PETSC_SUCCESS); 5067 } 5068 5069 /*@ 5070 MatAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATAIJ` matrix. 5071 5072 Not Collective 5073 5074 Input Parameter: 5075 . A - the matrix 5076 5077 Output Parameter: 5078 . A_loc - the local sequential matrix generated 5079 5080 Level: developer 5081 5082 Notes: 5083 The matrix is created by taking `A`'s local rows and putting them into a sequential matrix 5084 with `mlocal` rows and `n` columns. Where `mlocal` is obtained with `MatGetLocalSize()` and 5085 `n` is the global column count obtained with `MatGetSize()` 5086 5087 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5088 5089 For parallel matrices this creates an entirely new matrix. If the matrix is sequential it merely increases the reference count. 5090 5091 Destroy the matrix with `MatDestroy()` 5092 5093 .seealso: [](ch_matrices), `Mat`, `MatMPIAIJGetLocalMat()` 5094 @*/ 5095 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) 5096 { 5097 PetscBool mpi; 5098 5099 PetscFunctionBegin; 5100 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5101 if (mpi) { 5102 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5103 } else { 5104 *A_loc = A; 5105 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5106 } 5107 PetscFunctionReturn(PETSC_SUCCESS); 5108 } 5109 5110 /*@ 5111 MatMPIAIJGetLocalMat - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix. 5112 5113 Not Collective 5114 5115 Input Parameters: 5116 + A - the matrix 5117 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5118 5119 Output Parameter: 5120 . A_loc - the local sequential matrix generated 5121 5122 Level: developer 5123 5124 Notes: 5125 The matrix is created by taking all `A`'s local rows and putting them into a sequential 5126 matrix with `mlocal` rows and `n` columns.`mlocal` is the row count obtained with 5127 `MatGetLocalSize()` and `n` is the global column count obtained with `MatGetSize()`. 5128 5129 In other words combines the two parts of a parallel `MATMPIAIJ` matrix on each process to a single matrix. 5130 5131 When `A` is sequential and `MAT_INITIAL_MATRIX` is requested, the matrix returned is the diagonal part of `A` (which contains the entire matrix), 5132 with its reference count increased by one. Hence changing values of `A_loc` changes `A`. If `MAT_REUSE_MATRIX` is requested on a sequential matrix 5133 then `MatCopy`(Adiag,*`A_loc`,`SAME_NONZERO_PATTERN`) is called to fill `A_loc`. Thus one can preallocate the appropriate sequential matrix `A_loc` 5134 and then call this routine with `MAT_REUSE_MATRIX`. In this case, one can modify the values of `A_loc` without affecting the original sequential matrix. 5135 5136 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5137 @*/ 5138 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) 5139 { 5140 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5141 Mat_SeqAIJ *mat, *a, *b; 5142 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5143 const PetscScalar *aa, *ba, *aav, *bav; 5144 PetscScalar *ca, *cam; 5145 PetscMPIInt size; 5146 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5147 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5148 PetscBool match; 5149 5150 PetscFunctionBegin; 5151 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5152 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5153 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5154 if (size == 1) { 5155 if (scall == MAT_INITIAL_MATRIX) { 5156 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5157 *A_loc = mpimat->A; 5158 } else if (scall == MAT_REUSE_MATRIX) { 5159 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5160 } 5161 PetscFunctionReturn(PETSC_SUCCESS); 5162 } 5163 5164 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5165 a = (Mat_SeqAIJ *)mpimat->A->data; 5166 b = (Mat_SeqAIJ *)mpimat->B->data; 5167 ai = a->i; 5168 aj = a->j; 5169 bi = b->i; 5170 bj = b->j; 5171 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5172 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5173 aa = aav; 5174 ba = bav; 5175 if (scall == MAT_INITIAL_MATRIX) { 5176 PetscCall(PetscMalloc1(1 + am, &ci)); 5177 ci[0] = 0; 5178 for (i = 0; i < am; i++) ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); 5179 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5180 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5181 k = 0; 5182 for (i = 0; i < am; i++) { 5183 ncols_o = bi[i + 1] - bi[i]; 5184 ncols_d = ai[i + 1] - ai[i]; 5185 /* off-diagonal portion of A */ 5186 for (jo = 0; jo < ncols_o; jo++) { 5187 col = cmap[*bj]; 5188 if (col >= cstart) break; 5189 cj[k] = col; 5190 bj++; 5191 ca[k++] = *ba++; 5192 } 5193 /* diagonal portion of A */ 5194 for (j = 0; j < ncols_d; j++) { 5195 cj[k] = cstart + *aj++; 5196 ca[k++] = *aa++; 5197 } 5198 /* off-diagonal portion of A */ 5199 for (j = jo; j < ncols_o; j++) { 5200 cj[k] = cmap[*bj++]; 5201 ca[k++] = *ba++; 5202 } 5203 } 5204 /* put together the new matrix */ 5205 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5206 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5207 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5208 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5209 mat->free_a = PETSC_TRUE; 5210 mat->free_ij = PETSC_TRUE; 5211 mat->nonew = 0; 5212 } else if (scall == MAT_REUSE_MATRIX) { 5213 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5214 ci = mat->i; 5215 cj = mat->j; 5216 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5217 for (i = 0; i < am; i++) { 5218 /* off-diagonal portion of A */ 5219 ncols_o = bi[i + 1] - bi[i]; 5220 for (jo = 0; jo < ncols_o; jo++) { 5221 col = cmap[*bj]; 5222 if (col >= cstart) break; 5223 *cam++ = *ba++; 5224 bj++; 5225 } 5226 /* diagonal portion of A */ 5227 ncols_d = ai[i + 1] - ai[i]; 5228 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5229 /* off-diagonal portion of A */ 5230 for (j = jo; j < ncols_o; j++) { 5231 *cam++ = *ba++; 5232 bj++; 5233 } 5234 } 5235 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5236 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5237 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5238 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5239 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5240 PetscFunctionReturn(PETSC_SUCCESS); 5241 } 5242 5243 /*@ 5244 MatMPIAIJGetLocalMatMerge - Creates a `MATSEQAIJ` from a `MATMPIAIJ` matrix by taking all its local rows and putting them into a sequential matrix with 5245 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and off-diagonal part 5246 5247 Not Collective 5248 5249 Input Parameters: 5250 + A - the matrix 5251 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5252 5253 Output Parameters: 5254 + glob - sequential `IS` with global indices associated with the columns of the local sequential matrix generated (can be `NULL`) 5255 - A_loc - the local sequential matrix generated 5256 5257 Level: developer 5258 5259 Note: 5260 This is different from `MatMPIAIJGetLocalMat()` since the first columns in the returning matrix are those associated with the diagonal 5261 part, then those associated with the off-diagonal part (in its local ordering) 5262 5263 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5264 @*/ 5265 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) 5266 { 5267 Mat Ao, Ad; 5268 const PetscInt *cmap; 5269 PetscMPIInt size; 5270 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5271 5272 PetscFunctionBegin; 5273 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5274 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5275 if (size == 1) { 5276 if (scall == MAT_INITIAL_MATRIX) { 5277 PetscCall(PetscObjectReference((PetscObject)Ad)); 5278 *A_loc = Ad; 5279 } else if (scall == MAT_REUSE_MATRIX) { 5280 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5281 } 5282 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5283 PetscFunctionReturn(PETSC_SUCCESS); 5284 } 5285 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5286 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5287 if (f) { 5288 PetscCall((*f)(A, scall, glob, A_loc)); 5289 } else { 5290 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5291 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5292 Mat_SeqAIJ *c; 5293 PetscInt *ai = a->i, *aj = a->j; 5294 PetscInt *bi = b->i, *bj = b->j; 5295 PetscInt *ci, *cj; 5296 const PetscScalar *aa, *ba; 5297 PetscScalar *ca; 5298 PetscInt i, j, am, dn, on; 5299 5300 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5301 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5302 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5303 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5304 if (scall == MAT_INITIAL_MATRIX) { 5305 PetscInt k; 5306 PetscCall(PetscMalloc1(1 + am, &ci)); 5307 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5308 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5309 ci[0] = 0; 5310 for (i = 0, k = 0; i < am; i++) { 5311 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5312 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5313 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5314 /* diagonal portion of A */ 5315 for (j = 0; j < ncols_d; j++, k++) { 5316 cj[k] = *aj++; 5317 ca[k] = *aa++; 5318 } 5319 /* off-diagonal portion of A */ 5320 for (j = 0; j < ncols_o; j++, k++) { 5321 cj[k] = dn + *bj++; 5322 ca[k] = *ba++; 5323 } 5324 } 5325 /* put together the new matrix */ 5326 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5327 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5328 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5329 c = (Mat_SeqAIJ *)(*A_loc)->data; 5330 c->free_a = PETSC_TRUE; 5331 c->free_ij = PETSC_TRUE; 5332 c->nonew = 0; 5333 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5334 } else if (scall == MAT_REUSE_MATRIX) { 5335 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5336 for (i = 0; i < am; i++) { 5337 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5338 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5339 /* diagonal portion of A */ 5340 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5341 /* off-diagonal portion of A */ 5342 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5343 } 5344 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5345 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5346 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5347 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5348 if (glob) { 5349 PetscInt cst, *gidx; 5350 5351 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5352 PetscCall(PetscMalloc1(dn + on, &gidx)); 5353 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5354 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5355 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5356 } 5357 } 5358 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5359 PetscFunctionReturn(PETSC_SUCCESS); 5360 } 5361 5362 /*@C 5363 MatMPIAIJGetLocalMatCondensed - Creates a `MATSEQAIJ` matrix from an `MATMPIAIJ` matrix by taking all its local rows and NON-ZERO columns 5364 5365 Not Collective 5366 5367 Input Parameters: 5368 + A - the matrix 5369 . scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5370 . row - index set of rows to extract (or `NULL`) 5371 - col - index set of columns to extract (or `NULL`) 5372 5373 Output Parameter: 5374 . A_loc - the local sequential matrix generated 5375 5376 Level: developer 5377 5378 .seealso: [](ch_matrices), `Mat`, `MATMPIAIJ`, `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5379 @*/ 5380 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) 5381 { 5382 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5383 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5384 IS isrowa, iscola; 5385 Mat *aloc; 5386 PetscBool match; 5387 5388 PetscFunctionBegin; 5389 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5390 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5391 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5392 if (!row) { 5393 start = A->rmap->rstart; 5394 end = A->rmap->rend; 5395 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5396 } else { 5397 isrowa = *row; 5398 } 5399 if (!col) { 5400 start = A->cmap->rstart; 5401 cmap = a->garray; 5402 nzA = a->A->cmap->n; 5403 nzB = a->B->cmap->n; 5404 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5405 ncols = 0; 5406 for (i = 0; i < nzB; i++) { 5407 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5408 else break; 5409 } 5410 imark = i; 5411 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5412 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5413 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5414 } else { 5415 iscola = *col; 5416 } 5417 if (scall != MAT_INITIAL_MATRIX) { 5418 PetscCall(PetscMalloc1(1, &aloc)); 5419 aloc[0] = *A_loc; 5420 } 5421 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5422 if (!col) { /* attach global id of condensed columns */ 5423 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5424 } 5425 *A_loc = aloc[0]; 5426 PetscCall(PetscFree(aloc)); 5427 if (!row) PetscCall(ISDestroy(&isrowa)); 5428 if (!col) PetscCall(ISDestroy(&iscola)); 5429 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5430 PetscFunctionReturn(PETSC_SUCCESS); 5431 } 5432 5433 /* 5434 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5435 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5436 * on a global size. 5437 * */ 5438 static PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) 5439 { 5440 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5441 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)p->A->data, *po = (Mat_SeqAIJ *)p->B->data, *p_oth; 5442 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5443 PetscMPIInt owner; 5444 PetscSFNode *iremote, *oiremote; 5445 const PetscInt *lrowindices; 5446 PetscSF sf, osf; 5447 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5448 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5449 MPI_Comm comm; 5450 ISLocalToGlobalMapping mapping; 5451 const PetscScalar *pd_a, *po_a; 5452 5453 PetscFunctionBegin; 5454 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5455 /* plocalsize is the number of roots 5456 * nrows is the number of leaves 5457 * */ 5458 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5459 PetscCall(ISGetLocalSize(rows, &nrows)); 5460 PetscCall(PetscCalloc1(nrows, &iremote)); 5461 PetscCall(ISGetIndices(rows, &lrowindices)); 5462 for (i = 0; i < nrows; i++) { 5463 /* Find a remote index and an owner for a row 5464 * The row could be local or remote 5465 * */ 5466 owner = 0; 5467 lidx = 0; 5468 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5469 iremote[i].index = lidx; 5470 iremote[i].rank = owner; 5471 } 5472 /* Create SF to communicate how many nonzero columns for each row */ 5473 PetscCall(PetscSFCreate(comm, &sf)); 5474 /* SF will figure out the number of nonzero columns for each row, and their 5475 * offsets 5476 * */ 5477 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5478 PetscCall(PetscSFSetFromOptions(sf)); 5479 PetscCall(PetscSFSetUp(sf)); 5480 5481 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5482 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5483 PetscCall(PetscCalloc1(nrows, &pnnz)); 5484 roffsets[0] = 0; 5485 roffsets[1] = 0; 5486 for (i = 0; i < plocalsize; i++) { 5487 /* diagonal */ 5488 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5489 /* off-diagonal */ 5490 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5491 /* compute offsets so that we relative location for each row */ 5492 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5493 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5494 } 5495 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5496 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5497 /* 'r' means root, and 'l' means leaf */ 5498 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5499 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5500 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5501 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5502 PetscCall(PetscSFDestroy(&sf)); 5503 PetscCall(PetscFree(roffsets)); 5504 PetscCall(PetscFree(nrcols)); 5505 dntotalcols = 0; 5506 ontotalcols = 0; 5507 ncol = 0; 5508 for (i = 0; i < nrows; i++) { 5509 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5510 ncol = PetscMax(pnnz[i], ncol); 5511 /* diagonal */ 5512 dntotalcols += nlcols[i * 2 + 0]; 5513 /* off-diagonal */ 5514 ontotalcols += nlcols[i * 2 + 1]; 5515 } 5516 /* We do not need to figure the right number of columns 5517 * since all the calculations will be done by going through the raw data 5518 * */ 5519 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5520 PetscCall(MatSetUp(*P_oth)); 5521 PetscCall(PetscFree(pnnz)); 5522 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5523 /* diagonal */ 5524 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5525 /* off-diagonal */ 5526 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5527 /* diagonal */ 5528 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5529 /* off-diagonal */ 5530 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5531 dntotalcols = 0; 5532 ontotalcols = 0; 5533 ntotalcols = 0; 5534 for (i = 0; i < nrows; i++) { 5535 owner = 0; 5536 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5537 /* Set iremote for diag matrix */ 5538 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5539 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5540 iremote[dntotalcols].rank = owner; 5541 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5542 ilocal[dntotalcols++] = ntotalcols++; 5543 } 5544 /* off-diagonal */ 5545 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5546 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5547 oiremote[ontotalcols].rank = owner; 5548 oilocal[ontotalcols++] = ntotalcols++; 5549 } 5550 } 5551 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5552 PetscCall(PetscFree(loffsets)); 5553 PetscCall(PetscFree(nlcols)); 5554 PetscCall(PetscSFCreate(comm, &sf)); 5555 /* P serves as roots and P_oth is leaves 5556 * Diag matrix 5557 * */ 5558 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5559 PetscCall(PetscSFSetFromOptions(sf)); 5560 PetscCall(PetscSFSetUp(sf)); 5561 5562 PetscCall(PetscSFCreate(comm, &osf)); 5563 /* off-diagonal */ 5564 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5565 PetscCall(PetscSFSetFromOptions(osf)); 5566 PetscCall(PetscSFSetUp(osf)); 5567 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5568 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5569 /* operate on the matrix internal data to save memory */ 5570 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5571 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5572 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5573 /* Convert to global indices for diag matrix */ 5574 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5575 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5576 /* We want P_oth store global indices */ 5577 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5578 /* Use memory scalable approach */ 5579 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5580 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5581 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5582 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5583 /* Convert back to local indices */ 5584 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5585 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5586 nout = 0; 5587 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5588 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5589 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5590 /* Exchange values */ 5591 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5592 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5593 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5594 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5595 /* Stop PETSc from shrinking memory */ 5596 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5597 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5598 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5599 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5600 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5601 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5602 PetscCall(PetscSFDestroy(&sf)); 5603 PetscCall(PetscSFDestroy(&osf)); 5604 PetscFunctionReturn(PETSC_SUCCESS); 5605 } 5606 5607 /* 5608 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5609 * This supports MPIAIJ and MAIJ 5610 * */ 5611 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) 5612 { 5613 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5614 Mat_SeqAIJ *p_oth; 5615 IS rows, map; 5616 PetscHMapI hamp; 5617 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5618 MPI_Comm comm; 5619 PetscSF sf, osf; 5620 PetscBool has; 5621 5622 PetscFunctionBegin; 5623 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5624 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5625 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5626 * and then create a submatrix (that often is an overlapping matrix) 5627 * */ 5628 if (reuse == MAT_INITIAL_MATRIX) { 5629 /* Use a hash table to figure out unique keys */ 5630 PetscCall(PetscHMapICreateWithSize(a->B->cmap->n, &hamp)); 5631 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5632 count = 0; 5633 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5634 for (i = 0; i < a->B->cmap->n; i++) { 5635 key = a->garray[i] / dof; 5636 PetscCall(PetscHMapIHas(hamp, key, &has)); 5637 if (!has) { 5638 mapping[i] = count; 5639 PetscCall(PetscHMapISet(hamp, key, count++)); 5640 } else { 5641 /* Current 'i' has the same value the previous step */ 5642 mapping[i] = count - 1; 5643 } 5644 } 5645 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5646 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5647 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT, htsize, count); 5648 PetscCall(PetscCalloc1(htsize, &rowindices)); 5649 off = 0; 5650 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5651 PetscCall(PetscHMapIDestroy(&hamp)); 5652 PetscCall(PetscSortInt(htsize, rowindices)); 5653 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5654 /* In case, the matrix was already created but users want to recreate the matrix */ 5655 PetscCall(MatDestroy(P_oth)); 5656 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5657 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5658 PetscCall(ISDestroy(&map)); 5659 PetscCall(ISDestroy(&rows)); 5660 } else if (reuse == MAT_REUSE_MATRIX) { 5661 /* If matrix was already created, we simply update values using SF objects 5662 * that as attached to the matrix earlier. 5663 */ 5664 const PetscScalar *pd_a, *po_a; 5665 5666 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5667 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5668 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5669 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5670 /* Update values in place */ 5671 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5672 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5673 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5674 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5675 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5676 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5677 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5678 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5679 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5680 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5681 PetscFunctionReturn(PETSC_SUCCESS); 5682 } 5683 5684 /*@C 5685 MatGetBrowsOfAcols - Returns `IS` that contain rows of `B` that equal to nonzero columns of local `A` 5686 5687 Collective 5688 5689 Input Parameters: 5690 + A - the first matrix in `MATMPIAIJ` format 5691 . B - the second matrix in `MATMPIAIJ` format 5692 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5693 5694 Output Parameters: 5695 + rowb - On input index sets of rows of B to extract (or `NULL`), modified on output 5696 . colb - On input index sets of columns of B to extract (or `NULL`), modified on output 5697 - B_seq - the sequential matrix generated 5698 5699 Level: developer 5700 5701 .seealso: `Mat`, `MATMPIAIJ`, `IS`, `MatReuse` 5702 @*/ 5703 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) 5704 { 5705 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5706 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5707 IS isrowb, iscolb; 5708 Mat *bseq = NULL; 5709 5710 PetscFunctionBegin; 5711 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5712 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5713 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5714 5715 if (scall == MAT_INITIAL_MATRIX) { 5716 start = A->cmap->rstart; 5717 cmap = a->garray; 5718 nzA = a->A->cmap->n; 5719 nzB = a->B->cmap->n; 5720 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5721 ncols = 0; 5722 for (i = 0; i < nzB; i++) { /* row < local row index */ 5723 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5724 else break; 5725 } 5726 imark = i; 5727 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5728 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5729 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5730 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5731 } else { 5732 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5733 isrowb = *rowb; 5734 iscolb = *colb; 5735 PetscCall(PetscMalloc1(1, &bseq)); 5736 bseq[0] = *B_seq; 5737 } 5738 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5739 *B_seq = bseq[0]; 5740 PetscCall(PetscFree(bseq)); 5741 if (!rowb) { 5742 PetscCall(ISDestroy(&isrowb)); 5743 } else { 5744 *rowb = isrowb; 5745 } 5746 if (!colb) { 5747 PetscCall(ISDestroy(&iscolb)); 5748 } else { 5749 *colb = iscolb; 5750 } 5751 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5752 PetscFunctionReturn(PETSC_SUCCESS); 5753 } 5754 5755 /* 5756 MatGetBrowsOfAoCols_MPIAIJ - Creates a `MATSEQAIJ` matrix by taking rows of B that equal to nonzero columns 5757 of the OFF-DIAGONAL portion of local A 5758 5759 Collective 5760 5761 Input Parameters: 5762 + A,B - the matrices in `MATMPIAIJ` format 5763 - scall - either `MAT_INITIAL_MATRIX` or `MAT_REUSE_MATRIX` 5764 5765 Output Parameter: 5766 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5767 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5768 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5769 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5770 5771 Developer Note: 5772 This directly accesses information inside the VecScatter associated with the matrix-vector product 5773 for this matrix. This is not desirable.. 5774 5775 Level: developer 5776 5777 */ 5778 5779 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) 5780 { 5781 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5782 VecScatter ctx; 5783 MPI_Comm comm; 5784 const PetscMPIInt *rprocs, *sprocs; 5785 PetscMPIInt nrecvs, nsends; 5786 const PetscInt *srow, *rstarts, *sstarts; 5787 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5788 PetscInt i, j, k = 0, l, ll, nrows, *rstartsj = NULL, *sstartsj, len; 5789 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5790 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5791 PetscMPIInt size, tag, rank, nreqs; 5792 5793 PetscFunctionBegin; 5794 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5795 PetscCallMPI(MPI_Comm_size(comm, &size)); 5796 5797 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 5798 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5799 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5800 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5801 5802 if (size == 1) { 5803 startsj_s = NULL; 5804 bufa_ptr = NULL; 5805 *B_oth = NULL; 5806 PetscFunctionReturn(PETSC_SUCCESS); 5807 } 5808 5809 ctx = a->Mvctx; 5810 tag = ((PetscObject)ctx)->tag; 5811 5812 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5813 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5814 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5815 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5816 PetscCall(PetscMalloc1(nreqs, &reqs)); 5817 rwaits = reqs; 5818 swaits = PetscSafePointerPlusOffset(reqs, nrecvs); 5819 5820 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5821 if (scall == MAT_INITIAL_MATRIX) { 5822 /* i-array */ 5823 /* post receives */ 5824 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5825 for (i = 0; i < nrecvs; i++) { 5826 rowlen = rvalues + rstarts[i] * rbs; 5827 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5828 PetscCallMPI(MPIU_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5829 } 5830 5831 /* pack the outgoing message */ 5832 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5833 5834 sstartsj[0] = 0; 5835 rstartsj[0] = 0; 5836 len = 0; /* total length of j or a array to be sent */ 5837 if (nsends) { 5838 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5839 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5840 } 5841 for (i = 0; i < nsends; i++) { 5842 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5843 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5844 for (j = 0; j < nrows; j++) { 5845 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5846 for (l = 0; l < sbs; l++) { 5847 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5848 5849 rowlen[j * sbs + l] = ncols; 5850 5851 len += ncols; 5852 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5853 } 5854 k++; 5855 } 5856 PetscCallMPI(MPIU_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5857 5858 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5859 } 5860 /* recvs and sends of i-array are completed */ 5861 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5862 PetscCall(PetscFree(svalues)); 5863 5864 /* allocate buffers for sending j and a arrays */ 5865 PetscCall(PetscMalloc1(len + 1, &bufj)); 5866 PetscCall(PetscMalloc1(len + 1, &bufa)); 5867 5868 /* create i-array of B_oth */ 5869 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5870 5871 b_othi[0] = 0; 5872 len = 0; /* total length of j or a array to be received */ 5873 k = 0; 5874 for (i = 0; i < nrecvs; i++) { 5875 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5876 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5877 for (j = 0; j < nrows; j++) { 5878 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5879 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5880 k++; 5881 } 5882 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5883 } 5884 PetscCall(PetscFree(rvalues)); 5885 5886 /* allocate space for j and a arrays of B_oth */ 5887 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5888 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5889 5890 /* j-array */ 5891 /* post receives of j-array */ 5892 for (i = 0; i < nrecvs; i++) { 5893 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5894 PetscCallMPI(MPIU_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5895 } 5896 5897 /* pack the outgoing message j-array */ 5898 if (nsends) k = sstarts[0]; 5899 for (i = 0; i < nsends; i++) { 5900 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5901 bufJ = bufj + sstartsj[i]; 5902 for (j = 0; j < nrows; j++) { 5903 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5904 for (ll = 0; ll < sbs; ll++) { 5905 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5906 for (l = 0; l < ncols; l++) *bufJ++ = cols[l]; 5907 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5908 } 5909 } 5910 PetscCallMPI(MPIU_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5911 } 5912 5913 /* recvs and sends of j-array are completed */ 5914 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5915 } else if (scall == MAT_REUSE_MATRIX) { 5916 sstartsj = *startsj_s; 5917 rstartsj = *startsj_r; 5918 bufa = *bufa_ptr; 5919 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5920 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5921 5922 /* a-array */ 5923 /* post receives of a-array */ 5924 for (i = 0; i < nrecvs; i++) { 5925 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5926 PetscCallMPI(MPIU_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5927 } 5928 5929 /* pack the outgoing message a-array */ 5930 if (nsends) k = sstarts[0]; 5931 for (i = 0; i < nsends; i++) { 5932 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5933 bufA = bufa + sstartsj[i]; 5934 for (j = 0; j < nrows; j++) { 5935 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5936 for (ll = 0; ll < sbs; ll++) { 5937 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5938 for (l = 0; l < ncols; l++) *bufA++ = vals[l]; 5939 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5940 } 5941 } 5942 PetscCallMPI(MPIU_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5943 } 5944 /* recvs and sends of a-array are completed */ 5945 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5946 PetscCall(PetscFree(reqs)); 5947 5948 if (scall == MAT_INITIAL_MATRIX) { 5949 Mat_SeqAIJ *b_oth; 5950 5951 /* put together the new matrix */ 5952 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5953 5954 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5955 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5956 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5957 b_oth->free_a = PETSC_TRUE; 5958 b_oth->free_ij = PETSC_TRUE; 5959 b_oth->nonew = 0; 5960 5961 PetscCall(PetscFree(bufj)); 5962 if (!startsj_s || !bufa_ptr) { 5963 PetscCall(PetscFree2(sstartsj, rstartsj)); 5964 PetscCall(PetscFree(bufa_ptr)); 5965 } else { 5966 *startsj_s = sstartsj; 5967 *startsj_r = rstartsj; 5968 *bufa_ptr = bufa; 5969 } 5970 } else if (scall == MAT_REUSE_MATRIX) { 5971 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5972 } 5973 5974 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5975 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5976 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5977 PetscFunctionReturn(PETSC_SUCCESS); 5978 } 5979 5980 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5981 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5982 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5983 #if defined(PETSC_HAVE_MKL_SPARSE) 5984 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5985 #endif 5986 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5987 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5988 #if defined(PETSC_HAVE_ELEMENTAL) 5989 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5990 #endif 5991 #if defined(PETSC_HAVE_SCALAPACK) 5992 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5993 #endif 5994 #if defined(PETSC_HAVE_HYPRE) 5995 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5996 #endif 5997 #if defined(PETSC_HAVE_CUDA) 5998 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5999 #endif 6000 #if defined(PETSC_HAVE_HIP) 6001 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJHIPSPARSE(Mat, MatType, MatReuse, Mat *); 6002 #endif 6003 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6004 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 6005 #endif 6006 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 6007 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 6008 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 6009 6010 /* 6011 Computes (B'*A')' since computing B*A directly is untenable 6012 6013 n p p 6014 [ ] [ ] [ ] 6015 m [ A ] * n [ B ] = m [ C ] 6016 [ ] [ ] [ ] 6017 6018 */ 6019 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) 6020 { 6021 Mat At, Bt, Ct; 6022 6023 PetscFunctionBegin; 6024 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 6025 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 6026 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_CURRENT, &Ct)); 6027 PetscCall(MatDestroy(&At)); 6028 PetscCall(MatDestroy(&Bt)); 6029 PetscCall(MatTransposeSetPrecursor(Ct, C)); 6030 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 6031 PetscCall(MatDestroy(&Ct)); 6032 PetscFunctionReturn(PETSC_SUCCESS); 6033 } 6034 6035 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) 6036 { 6037 PetscBool cisdense; 6038 6039 PetscFunctionBegin; 6040 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 6041 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 6042 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 6043 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, MATMPIDENSEHIP, "")); 6044 if (!cisdense) PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 6045 PetscCall(MatSetUp(C)); 6046 6047 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 6048 PetscFunctionReturn(PETSC_SUCCESS); 6049 } 6050 6051 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) 6052 { 6053 Mat_Product *product = C->product; 6054 Mat A = product->A, B = product->B; 6055 6056 PetscFunctionBegin; 6057 PetscCheck(A->cmap->rstart == B->rmap->rstart && A->cmap->rend == B->rmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", 6058 A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 6059 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 6060 C->ops->productsymbolic = MatProductSymbolic_AB; 6061 PetscFunctionReturn(PETSC_SUCCESS); 6062 } 6063 6064 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) 6065 { 6066 Mat_Product *product = C->product; 6067 6068 PetscFunctionBegin; 6069 if (product->type == MATPRODUCT_AB) PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); 6070 PetscFunctionReturn(PETSC_SUCCESS); 6071 } 6072 6073 /* 6074 Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 6075 6076 Input Parameters: 6077 6078 j1,rowBegin1,rowEnd1,jmap1: describe the first set of nonzeros (Set1) 6079 j2,rowBegin2,rowEnd2,jmap2: describe the second set of nonzeros (Set2) 6080 6081 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 6082 6083 For Set1, j1[] contains column indices of the nonzeros. 6084 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 6085 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6086 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6087 6088 Similar for Set2. 6089 6090 This routine merges the two sets of nonzeros row by row and removes repeats. 6091 6092 Output Parameters: (memory is allocated by the caller) 6093 6094 i[],j[]: the CSR of the merged matrix, which has m rows. 6095 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6096 imap2[]: similar to imap1[], but for Set2. 6097 Note we order nonzeros row-by-row and from left to right. 6098 */ 6099 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) 6100 { 6101 PetscInt r, m; /* Row index of mat */ 6102 PetscCount t, t1, t2, b1, e1, b2, e2; 6103 6104 PetscFunctionBegin; 6105 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6106 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6107 i[0] = 0; 6108 for (r = 0; r < m; r++) { /* Do row by row merging */ 6109 b1 = rowBegin1[r]; 6110 e1 = rowEnd1[r]; 6111 b2 = rowBegin2[r]; 6112 e2 = rowEnd2[r]; 6113 while (b1 < e1 && b2 < e2) { 6114 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6115 j[t] = j1[b1]; 6116 imap1[t1] = t; 6117 imap2[t2] = t; 6118 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6119 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6120 t1++; 6121 t2++; 6122 t++; 6123 } else if (j1[b1] < j2[b2]) { 6124 j[t] = j1[b1]; 6125 imap1[t1] = t; 6126 b1 += jmap1[t1 + 1] - jmap1[t1]; 6127 t1++; 6128 t++; 6129 } else { 6130 j[t] = j2[b2]; 6131 imap2[t2] = t; 6132 b2 += jmap2[t2 + 1] - jmap2[t2]; 6133 t2++; 6134 t++; 6135 } 6136 } 6137 /* Merge the remaining in either j1[] or j2[] */ 6138 while (b1 < e1) { 6139 j[t] = j1[b1]; 6140 imap1[t1] = t; 6141 b1 += jmap1[t1 + 1] - jmap1[t1]; 6142 t1++; 6143 t++; 6144 } 6145 while (b2 < e2) { 6146 j[t] = j2[b2]; 6147 imap2[t2] = t; 6148 b2 += jmap2[t2 + 1] - jmap2[t2]; 6149 t2++; 6150 t++; 6151 } 6152 PetscCall(PetscIntCast(t, i + r + 1)); 6153 } 6154 PetscFunctionReturn(PETSC_SUCCESS); 6155 } 6156 6157 /* 6158 Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6159 6160 Input Parameters: 6161 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6162 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6163 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6164 6165 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6166 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6167 6168 Output Parameters: 6169 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6170 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6171 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6172 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6173 6174 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6175 Atot: number of entries belonging to the diagonal block. 6176 Annz: number of unique nonzeros belonging to the diagonal block. 6177 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6178 repeats (i.e., same 'i,j' pair). 6179 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6180 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6181 6182 Atot: number of entries belonging to the diagonal block 6183 Annz: number of unique nonzeros belonging to the diagonal block. 6184 6185 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6186 6187 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6188 */ 6189 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) 6190 { 6191 PetscInt cstart, cend, rstart, rend, row, col; 6192 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6193 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6194 PetscCount k, m, p, q, r, s, mid; 6195 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6196 6197 PetscFunctionBegin; 6198 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6199 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6200 m = rend - rstart; 6201 6202 /* Skip negative rows */ 6203 for (k = 0; k < n; k++) 6204 if (i[k] >= 0) break; 6205 6206 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6207 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6208 */ 6209 while (k < n) { 6210 row = i[k]; 6211 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6212 for (s = k; s < n; s++) 6213 if (i[s] != row) break; 6214 6215 /* Shift diag columns to range of [-PETSC_INT_MAX, -1] */ 6216 for (p = k; p < s; p++) { 6217 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_INT_MAX; 6218 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6219 } 6220 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6221 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6222 rowBegin[row - rstart] = k; 6223 rowMid[row - rstart] = mid; 6224 rowEnd[row - rstart] = s; 6225 6226 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6227 Atot += mid - k; 6228 Btot += s - mid; 6229 6230 /* Count unique nonzeros of this diag row */ 6231 for (p = k; p < mid;) { 6232 col = j[p]; 6233 do { 6234 j[p] += PETSC_INT_MAX; /* Revert the modified diagonal indices */ 6235 p++; 6236 } while (p < mid && j[p] == col); 6237 Annz++; 6238 } 6239 6240 /* Count unique nonzeros of this offdiag row */ 6241 for (p = mid; p < s;) { 6242 col = j[p]; 6243 do { 6244 p++; 6245 } while (p < s && j[p] == col); 6246 Bnnz++; 6247 } 6248 k = s; 6249 } 6250 6251 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6252 PetscCall(PetscMalloc1(Atot, &Aperm)); 6253 PetscCall(PetscMalloc1(Btot, &Bperm)); 6254 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6255 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6256 6257 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6258 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6259 for (r = 0; r < m; r++) { 6260 k = rowBegin[r]; 6261 mid = rowMid[r]; 6262 s = rowEnd[r]; 6263 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Aperm, Atot), PetscSafePointerPlusOffset(perm, k), mid - k)); 6264 PetscCall(PetscArraycpy(PetscSafePointerPlusOffset(Bperm, Btot), PetscSafePointerPlusOffset(perm, mid), s - mid)); 6265 Atot += mid - k; 6266 Btot += s - mid; 6267 6268 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6269 for (p = k; p < mid;) { 6270 col = j[p]; 6271 q = p; 6272 do { 6273 p++; 6274 } while (p < mid && j[p] == col); 6275 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6276 Annz++; 6277 } 6278 6279 for (p = mid; p < s;) { 6280 col = j[p]; 6281 q = p; 6282 do { 6283 p++; 6284 } while (p < s && j[p] == col); 6285 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6286 Bnnz++; 6287 } 6288 } 6289 /* Output */ 6290 *Aperm_ = Aperm; 6291 *Annz_ = Annz; 6292 *Atot_ = Atot; 6293 *Ajmap_ = Ajmap; 6294 *Bperm_ = Bperm; 6295 *Bnnz_ = Bnnz; 6296 *Btot_ = Btot; 6297 *Bjmap_ = Bjmap; 6298 PetscFunctionReturn(PETSC_SUCCESS); 6299 } 6300 6301 /* 6302 Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6303 6304 Input Parameters: 6305 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6306 nnz: number of unique nonzeros in the merged matrix 6307 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6308 jmap[nnz1+1]: i-th nonzero in the set has jmap[i+1] - jmap[i] repeats in the set 6309 6310 Output Parameter: (memory is allocated by the caller) 6311 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6312 6313 Example: 6314 nnz1 = 4 6315 nnz = 6 6316 imap = [1,3,4,5] 6317 jmap = [0,3,5,6,7] 6318 then, 6319 jmap_new = [0,0,3,3,5,6,7] 6320 */ 6321 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) 6322 { 6323 PetscCount k, p; 6324 6325 PetscFunctionBegin; 6326 jmap_new[0] = 0; 6327 p = nnz; /* p loops over jmap_new[] backwards */ 6328 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6329 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6330 } 6331 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6332 PetscFunctionReturn(PETSC_SUCCESS); 6333 } 6334 6335 static PetscErrorCode MatCOOStructDestroy_MPIAIJ(void **data) 6336 { 6337 MatCOOStruct_MPIAIJ *coo = (MatCOOStruct_MPIAIJ *)*data; 6338 6339 PetscFunctionBegin; 6340 PetscCall(PetscSFDestroy(&coo->sf)); 6341 PetscCall(PetscFree(coo->Aperm1)); 6342 PetscCall(PetscFree(coo->Bperm1)); 6343 PetscCall(PetscFree(coo->Ajmap1)); 6344 PetscCall(PetscFree(coo->Bjmap1)); 6345 PetscCall(PetscFree(coo->Aimap2)); 6346 PetscCall(PetscFree(coo->Bimap2)); 6347 PetscCall(PetscFree(coo->Aperm2)); 6348 PetscCall(PetscFree(coo->Bperm2)); 6349 PetscCall(PetscFree(coo->Ajmap2)); 6350 PetscCall(PetscFree(coo->Bjmap2)); 6351 PetscCall(PetscFree(coo->Cperm1)); 6352 PetscCall(PetscFree2(coo->sendbuf, coo->recvbuf)); 6353 PetscCall(PetscFree(coo)); 6354 PetscFunctionReturn(PETSC_SUCCESS); 6355 } 6356 6357 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) 6358 { 6359 MPI_Comm comm; 6360 PetscMPIInt rank, size; 6361 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6362 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6363 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6364 PetscContainer container; 6365 MatCOOStruct_MPIAIJ *coo; 6366 6367 PetscFunctionBegin; 6368 PetscCall(PetscFree(mpiaij->garray)); 6369 PetscCall(VecDestroy(&mpiaij->lvec)); 6370 #if defined(PETSC_USE_CTABLE) 6371 PetscCall(PetscHMapIDestroy(&mpiaij->colmap)); 6372 #else 6373 PetscCall(PetscFree(mpiaij->colmap)); 6374 #endif 6375 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6376 mat->assembled = PETSC_FALSE; 6377 mat->was_assembled = PETSC_FALSE; 6378 6379 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6380 PetscCallMPI(MPI_Comm_size(comm, &size)); 6381 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6382 PetscCall(PetscLayoutSetUp(mat->rmap)); 6383 PetscCall(PetscLayoutSetUp(mat->cmap)); 6384 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6385 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6386 PetscCall(MatGetLocalSize(mat, &m, &n)); 6387 PetscCall(MatGetSize(mat, &M, &N)); 6388 6389 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6390 /* entries come first, then local rows, then remote rows. */ 6391 PetscCount n1 = coo_n, *perm1; 6392 PetscInt *i1 = coo_i, *j1 = coo_j; 6393 6394 PetscCall(PetscMalloc1(n1, &perm1)); 6395 for (k = 0; k < n1; k++) perm1[k] = k; 6396 6397 /* Manipulate indices so that entries with negative row or col indices will have smallest 6398 row indices, local entries will have greater but negative row indices, and remote entries 6399 will have positive row indices. 6400 */ 6401 for (k = 0; k < n1; k++) { 6402 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_INT_MIN; /* e.g., -2^31, minimal to move them ahead */ 6403 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_INT_MAX; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_INT_MAX, -1] */ 6404 else { 6405 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6406 if (mpiaij->donotstash) i1[k] = PETSC_INT_MIN; /* Ignore offproc entries as if they had negative indices */ 6407 } 6408 } 6409 6410 /* Sort by row; after that, [0,k) have ignored entries, [k,rem) have local rows and [rem,n1) have remote rows */ 6411 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6412 6413 /* Advance k to the first entry we need to take care of */ 6414 for (k = 0; k < n1; k++) 6415 if (i1[k] > PETSC_INT_MIN) break; 6416 PetscCount i1start = k; 6417 6418 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_INT_MAX, &rem)); /* rem is upper bound of the last local row */ 6419 for (; k < rem; k++) i1[k] += PETSC_INT_MAX; /* Revert row indices of local rows*/ 6420 6421 /* Send remote rows to their owner */ 6422 /* Find which rows should be sent to which remote ranks*/ 6423 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6424 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6425 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6426 const PetscInt *ranges; 6427 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6428 6429 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6430 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6431 for (k = rem; k < n1;) { 6432 PetscMPIInt owner; 6433 PetscInt firstRow, lastRow; 6434 6435 /* Locate a row range */ 6436 firstRow = i1[k]; /* first row of this owner */ 6437 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6438 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6439 6440 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6441 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6442 6443 /* All entries in [k,p) belong to this remote owner */ 6444 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6445 PetscMPIInt *sendto2; 6446 PetscInt *nentries2; 6447 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6448 6449 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6450 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6451 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6452 PetscCall(PetscFree2(sendto, nentries2)); 6453 sendto = sendto2; 6454 nentries = nentries2; 6455 maxNsend = maxNsend2; 6456 } 6457 sendto[nsend] = owner; 6458 PetscCall(PetscIntCast(p - k, &nentries[nsend])); 6459 nsend++; 6460 k = p; 6461 } 6462 6463 /* Build 1st SF to know offsets on remote to send data */ 6464 PetscSF sf1; 6465 PetscInt nroots = 1, nroots2 = 0; 6466 PetscInt nleaves = nsend, nleaves2 = 0; 6467 PetscInt *offsets; 6468 PetscSFNode *iremote; 6469 6470 PetscCall(PetscSFCreate(comm, &sf1)); 6471 PetscCall(PetscMalloc1(nsend, &iremote)); 6472 PetscCall(PetscMalloc1(nsend, &offsets)); 6473 for (k = 0; k < nsend; k++) { 6474 iremote[k].rank = sendto[k]; 6475 iremote[k].index = 0; 6476 nleaves2 += nentries[k]; 6477 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6478 } 6479 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6480 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6481 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6482 PetscCall(PetscSFDestroy(&sf1)); 6483 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT, nleaves2, n1 - rem); 6484 6485 /* Build 2nd SF to send remote COOs to their owner */ 6486 PetscSF sf2; 6487 nroots = nroots2; 6488 nleaves = nleaves2; 6489 PetscCall(PetscSFCreate(comm, &sf2)); 6490 PetscCall(PetscSFSetFromOptions(sf2)); 6491 PetscCall(PetscMalloc1(nleaves, &iremote)); 6492 p = 0; 6493 for (k = 0; k < nsend; k++) { 6494 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6495 for (q = 0; q < nentries[k]; q++, p++) { 6496 iremote[p].rank = sendto[k]; 6497 PetscCall(PetscIntCast(offsets[k] + q, &iremote[p].index)); 6498 } 6499 } 6500 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6501 6502 /* Send the remote COOs to their owner */ 6503 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6504 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6505 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6506 PetscAssert(rem == 0 || i1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6507 PetscAssert(rem == 0 || j1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6508 PetscInt *i1prem = PetscSafePointerPlusOffset(i1, rem); 6509 PetscInt *j1prem = PetscSafePointerPlusOffset(j1, rem); 6510 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1prem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6511 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1prem, i2, MPI_REPLACE)); 6512 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1prem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6513 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1prem, j2, MPI_REPLACE)); 6514 6515 PetscCall(PetscFree(offsets)); 6516 PetscCall(PetscFree2(sendto, nentries)); 6517 6518 /* Sort received COOs by row along with the permutation array */ 6519 for (k = 0; k < n2; k++) perm2[k] = k; 6520 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6521 6522 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6523 PetscCount *Cperm1; 6524 PetscAssert(rem == 0 || perm1 != NULL, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot add nonzero offset to null"); 6525 PetscCount *perm1prem = PetscSafePointerPlusOffset(perm1, rem); 6526 PetscCall(PetscMalloc1(nleaves, &Cperm1)); 6527 PetscCall(PetscArraycpy(Cperm1, perm1prem, nleaves)); 6528 6529 /* Support for HYPRE matrices, kind of a hack. 6530 Swap min column with diagonal so that diagonal values will go first */ 6531 PetscBool hypre; 6532 PetscCall(PetscStrcmp("_internal_COO_mat_for_hypre", ((PetscObject)mat)->name, &hypre)); 6533 if (hypre) { 6534 PetscInt *minj; 6535 PetscBT hasdiag; 6536 6537 PetscCall(PetscBTCreate(m, &hasdiag)); 6538 PetscCall(PetscMalloc1(m, &minj)); 6539 for (k = 0; k < m; k++) minj[k] = PETSC_INT_MAX; 6540 for (k = i1start; k < rem; k++) { 6541 if (j1[k] < cstart || j1[k] >= cend) continue; 6542 const PetscInt rindex = i1[k] - rstart; 6543 if ((j1[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6544 minj[rindex] = PetscMin(minj[rindex], j1[k]); 6545 } 6546 for (k = 0; k < n2; k++) { 6547 if (j2[k] < cstart || j2[k] >= cend) continue; 6548 const PetscInt rindex = i2[k] - rstart; 6549 if ((j2[k] - cstart) == rindex) PetscCall(PetscBTSet(hasdiag, rindex)); 6550 minj[rindex] = PetscMin(minj[rindex], j2[k]); 6551 } 6552 for (k = i1start; k < rem; k++) { 6553 const PetscInt rindex = i1[k] - rstart; 6554 if (j1[k] < cstart || j1[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6555 if (j1[k] == minj[rindex]) j1[k] = i1[k] + (cstart - rstart); 6556 else if ((j1[k] - cstart) == rindex) j1[k] = minj[rindex]; 6557 } 6558 for (k = 0; k < n2; k++) { 6559 const PetscInt rindex = i2[k] - rstart; 6560 if (j2[k] < cstart || j2[k] >= cend || !PetscBTLookup(hasdiag, rindex)) continue; 6561 if (j2[k] == minj[rindex]) j2[k] = i2[k] + (cstart - rstart); 6562 else if ((j2[k] - cstart) == rindex) j2[k] = minj[rindex]; 6563 } 6564 PetscCall(PetscBTDestroy(&hasdiag)); 6565 PetscCall(PetscFree(minj)); 6566 } 6567 6568 /* Split local COOs and received COOs into diag/offdiag portions */ 6569 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6570 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1; 6571 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6572 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6573 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6574 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6575 6576 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6577 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6578 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6579 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6580 6581 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6582 PetscInt *Ai, *Bi; 6583 PetscInt *Aj, *Bj; 6584 6585 PetscCall(PetscMalloc1(m + 1, &Ai)); 6586 PetscCall(PetscMalloc1(m + 1, &Bi)); 6587 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6588 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6589 6590 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6591 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6592 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6593 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6594 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6595 6596 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6597 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6598 6599 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6600 /* expect nonzeros in A/B most likely have local contributing entries */ 6601 PetscInt Annz = Ai[m]; 6602 PetscInt Bnnz = Bi[m]; 6603 PetscCount *Ajmap1_new, *Bjmap1_new; 6604 6605 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6606 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6607 6608 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6609 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6610 6611 PetscCall(PetscFree(Aimap1)); 6612 PetscCall(PetscFree(Ajmap1)); 6613 PetscCall(PetscFree(Bimap1)); 6614 PetscCall(PetscFree(Bjmap1)); 6615 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6616 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6617 PetscCall(PetscFree(perm1)); 6618 PetscCall(PetscFree3(i2, j2, perm2)); 6619 6620 Ajmap1 = Ajmap1_new; 6621 Bjmap1 = Bjmap1_new; 6622 6623 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6624 if (Annz < Annz1 + Annz2) { 6625 PetscInt *Aj_new; 6626 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6627 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6628 PetscCall(PetscFree(Aj)); 6629 Aj = Aj_new; 6630 } 6631 6632 if (Bnnz < Bnnz1 + Bnnz2) { 6633 PetscInt *Bj_new; 6634 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6635 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6636 PetscCall(PetscFree(Bj)); 6637 Bj = Bj_new; 6638 } 6639 6640 /* Create new submatrices for on-process and off-process coupling */ 6641 PetscScalar *Aa, *Ba; 6642 MatType rtype; 6643 Mat_SeqAIJ *a, *b; 6644 PetscObjectState state; 6645 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6646 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6647 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6648 if (cstart) { 6649 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6650 } 6651 6652 PetscCall(MatGetRootType_Private(mat, &rtype)); 6653 6654 MatSeqXAIJGetOptions_Private(mpiaij->A); 6655 PetscCall(MatDestroy(&mpiaij->A)); 6656 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6657 PetscCall(MatSetBlockSizesFromMats(mpiaij->A, mat, mat)); 6658 MatSeqXAIJRestoreOptions_Private(mpiaij->A); 6659 6660 MatSeqXAIJGetOptions_Private(mpiaij->B); 6661 PetscCall(MatDestroy(&mpiaij->B)); 6662 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6663 PetscCall(MatSetBlockSizesFromMats(mpiaij->B, mat, mat)); 6664 MatSeqXAIJRestoreOptions_Private(mpiaij->B); 6665 6666 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6667 mat->was_assembled = PETSC_TRUE; // was_assembled in effect means the Mvctx is built; doing so avoids redundant MatSetUpMultiply_MPIAIJ 6668 state = mpiaij->A->nonzerostate + mpiaij->B->nonzerostate; 6669 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 6670 6671 a = (Mat_SeqAIJ *)mpiaij->A->data; 6672 b = (Mat_SeqAIJ *)mpiaij->B->data; 6673 a->free_a = PETSC_TRUE; 6674 a->free_ij = PETSC_TRUE; 6675 b->free_a = PETSC_TRUE; 6676 b->free_ij = PETSC_TRUE; 6677 a->maxnz = a->nz; 6678 b->maxnz = b->nz; 6679 6680 /* conversion must happen AFTER multiply setup */ 6681 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6682 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6683 PetscCall(VecDestroy(&mpiaij->lvec)); 6684 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6685 6686 // Put the COO struct in a container and then attach that to the matrix 6687 PetscCall(PetscMalloc1(1, &coo)); 6688 coo->n = coo_n; 6689 coo->sf = sf2; 6690 coo->sendlen = nleaves; 6691 coo->recvlen = nroots; 6692 coo->Annz = Annz; 6693 coo->Bnnz = Bnnz; 6694 coo->Annz2 = Annz2; 6695 coo->Bnnz2 = Bnnz2; 6696 coo->Atot1 = Atot1; 6697 coo->Atot2 = Atot2; 6698 coo->Btot1 = Btot1; 6699 coo->Btot2 = Btot2; 6700 coo->Ajmap1 = Ajmap1; 6701 coo->Aperm1 = Aperm1; 6702 coo->Bjmap1 = Bjmap1; 6703 coo->Bperm1 = Bperm1; 6704 coo->Aimap2 = Aimap2; 6705 coo->Ajmap2 = Ajmap2; 6706 coo->Aperm2 = Aperm2; 6707 coo->Bimap2 = Bimap2; 6708 coo->Bjmap2 = Bjmap2; 6709 coo->Bperm2 = Bperm2; 6710 coo->Cperm1 = Cperm1; 6711 // Allocate in preallocation. If not used, it has zero cost on host 6712 PetscCall(PetscMalloc2(coo->sendlen, &coo->sendbuf, coo->recvlen, &coo->recvbuf)); 6713 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 6714 PetscCall(PetscContainerSetPointer(container, coo)); 6715 PetscCall(PetscContainerSetCtxDestroy(container, MatCOOStructDestroy_MPIAIJ)); 6716 PetscCall(PetscObjectCompose((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject)container)); 6717 PetscCall(PetscContainerDestroy(&container)); 6718 PetscFunctionReturn(PETSC_SUCCESS); 6719 } 6720 6721 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) 6722 { 6723 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6724 Mat A = mpiaij->A, B = mpiaij->B; 6725 PetscScalar *Aa, *Ba; 6726 PetscScalar *sendbuf, *recvbuf; 6727 const PetscCount *Ajmap1, *Ajmap2, *Aimap2; 6728 const PetscCount *Bjmap1, *Bjmap2, *Bimap2; 6729 const PetscCount *Aperm1, *Aperm2, *Bperm1, *Bperm2; 6730 const PetscCount *Cperm1; 6731 PetscContainer container; 6732 MatCOOStruct_MPIAIJ *coo; 6733 6734 PetscFunctionBegin; 6735 PetscCall(PetscObjectQuery((PetscObject)mat, "__PETSc_MatCOOStruct_Host", (PetscObject *)&container)); 6736 PetscCheck(container, PetscObjectComm((PetscObject)mat), PETSC_ERR_PLIB, "Not found MatCOOStruct on this matrix"); 6737 PetscCall(PetscContainerGetPointer(container, (void **)&coo)); 6738 sendbuf = coo->sendbuf; 6739 recvbuf = coo->recvbuf; 6740 Ajmap1 = coo->Ajmap1; 6741 Ajmap2 = coo->Ajmap2; 6742 Aimap2 = coo->Aimap2; 6743 Bjmap1 = coo->Bjmap1; 6744 Bjmap2 = coo->Bjmap2; 6745 Bimap2 = coo->Bimap2; 6746 Aperm1 = coo->Aperm1; 6747 Aperm2 = coo->Aperm2; 6748 Bperm1 = coo->Bperm1; 6749 Bperm2 = coo->Bperm2; 6750 Cperm1 = coo->Cperm1; 6751 6752 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6753 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6754 6755 /* Pack entries to be sent to remote */ 6756 for (PetscCount i = 0; i < coo->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6757 6758 /* Send remote entries to their owner and overlap the communication with local computation */ 6759 PetscCall(PetscSFReduceWithMemTypeBegin(coo->sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6760 /* Add local entries to A and B */ 6761 for (PetscCount i = 0; i < coo->Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6762 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stability */ 6763 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6764 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6765 } 6766 for (PetscCount i = 0; i < coo->Bnnz; i++) { 6767 PetscScalar sum = 0.0; 6768 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6769 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6770 } 6771 PetscCall(PetscSFReduceEnd(coo->sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6772 6773 /* Add received remote entries to A and B */ 6774 for (PetscCount i = 0; i < coo->Annz2; i++) { 6775 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6776 } 6777 for (PetscCount i = 0; i < coo->Bnnz2; i++) { 6778 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6779 } 6780 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6781 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6782 PetscFunctionReturn(PETSC_SUCCESS); 6783 } 6784 6785 /*MC 6786 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6787 6788 Options Database Keys: 6789 . -mat_type mpiaij - sets the matrix type to `MATMPIAIJ` during a call to `MatSetFromOptions()` 6790 6791 Level: beginner 6792 6793 Notes: 6794 `MatSetValues()` may be called for this matrix type with a `NULL` argument for the numerical values, 6795 in this case the values associated with the rows and columns one passes in are set to zero 6796 in the matrix 6797 6798 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 6799 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 6800 6801 .seealso: [](ch_matrices), `Mat`, `MATSEQAIJ`, `MATAIJ`, `MatCreateAIJ()` 6802 M*/ 6803 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) 6804 { 6805 Mat_MPIAIJ *b; 6806 PetscMPIInt size; 6807 6808 PetscFunctionBegin; 6809 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6810 6811 PetscCall(PetscNew(&b)); 6812 B->data = (void *)b; 6813 B->ops[0] = MatOps_Values; 6814 B->assembled = PETSC_FALSE; 6815 B->insertmode = NOT_SET_VALUES; 6816 b->size = size; 6817 6818 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6819 6820 /* build cache for off array entries formed */ 6821 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6822 6823 b->donotstash = PETSC_FALSE; 6824 b->colmap = NULL; 6825 b->garray = NULL; 6826 b->roworiented = PETSC_TRUE; 6827 6828 /* stuff used for matrix vector multiply */ 6829 b->lvec = NULL; 6830 b->Mvctx = NULL; 6831 6832 /* stuff for MatGetRow() */ 6833 b->rowindices = NULL; 6834 b->rowvalues = NULL; 6835 b->getrowactive = PETSC_FALSE; 6836 6837 /* flexible pointer used in CUSPARSE classes */ 6838 b->spptr = NULL; 6839 6840 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6841 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6842 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6843 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6844 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6845 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6846 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetHash_C", MatResetHash_MPIAIJ)); 6847 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6848 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6851 #if defined(PETSC_HAVE_CUDA) 6852 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6853 #endif 6854 #if defined(PETSC_HAVE_HIP) 6855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijhipsparse_C", MatConvert_MPIAIJ_MPIAIJHIPSPARSE)); 6856 #endif 6857 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6859 #endif 6860 #if defined(PETSC_HAVE_MKL_SPARSE) 6861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6862 #endif 6863 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6864 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6865 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6866 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6867 #if defined(PETSC_HAVE_ELEMENTAL) 6868 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6869 #endif 6870 #if defined(PETSC_HAVE_SCALAPACK) 6871 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6872 #endif 6873 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6874 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6875 #if defined(PETSC_HAVE_HYPRE) 6876 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6877 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6878 #endif 6879 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6880 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6881 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6882 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6883 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6884 PetscFunctionReturn(PETSC_SUCCESS); 6885 } 6886 6887 /*@ 6888 MatCreateMPIAIJWithSplitArrays - creates a `MATMPIAIJ` matrix using arrays that contain the "diagonal" 6889 and "off-diagonal" part of the matrix in CSR format. 6890 6891 Collective 6892 6893 Input Parameters: 6894 + comm - MPI communicator 6895 . m - number of local rows (Cannot be `PETSC_DECIDE`) 6896 . n - This value should be the same as the local size used in creating the 6897 x vector for the matrix-vector product $y = Ax$. (or `PETSC_DECIDE` to have 6898 calculated if `N` is given) For square matrices `n` is almost always `m`. 6899 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 6900 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 6901 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6902 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6903 . a - matrix values 6904 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6905 . oj - column indices, which must be global, representing global columns in the `MATMPIAIJ` matrix 6906 - oa - matrix values 6907 6908 Output Parameter: 6909 . mat - the matrix 6910 6911 Level: advanced 6912 6913 Notes: 6914 The `i`, `j`, and `a` arrays ARE NOT copied by this routine into the internal format used by PETSc (even in Fortran). The user 6915 must free the arrays once the matrix has been destroyed and not before. 6916 6917 The `i` and `j` indices are 0 based 6918 6919 See `MatCreateAIJ()` for the definition of "diagonal" and "off-diagonal" portion of the matrix 6920 6921 This sets local rows and cannot be used to set off-processor values. 6922 6923 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6924 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6925 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6926 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6927 keep track of the underlying array. Use `MatSetOption`(A,`MAT_NO_OFF_PROC_ENTRIES`,`PETSC_TRUE`) to disable all 6928 communication if it is known that only local entries will be set. 6929 6930 .seealso: [](ch_matrices), `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6931 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6932 @*/ 6933 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) 6934 { 6935 Mat_MPIAIJ *maij; 6936 6937 PetscFunctionBegin; 6938 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6939 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6940 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6941 PetscCall(MatCreate(comm, mat)); 6942 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6943 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6944 maij = (Mat_MPIAIJ *)(*mat)->data; 6945 6946 (*mat)->preallocated = PETSC_TRUE; 6947 6948 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6949 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6950 6951 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6952 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6953 6954 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6955 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6956 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6957 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6958 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6959 PetscFunctionReturn(PETSC_SUCCESS); 6960 } 6961 6962 typedef struct { 6963 Mat *mp; /* intermediate products */ 6964 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6965 PetscInt cp; /* number of intermediate products */ 6966 6967 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6968 PetscInt *startsj_s, *startsj_r; 6969 PetscScalar *bufa; 6970 Mat P_oth; 6971 6972 /* may take advantage of merging product->B */ 6973 Mat Bloc; /* B-local by merging diag and off-diag */ 6974 6975 /* cusparse does not have support to split between symbolic and numeric phases. 6976 When api_user is true, we don't need to update the numerical values 6977 of the temporary storage */ 6978 PetscBool reusesym; 6979 6980 /* support for COO values insertion */ 6981 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6982 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6983 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6984 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6985 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6986 PetscMemType mtype; 6987 6988 /* customization */ 6989 PetscBool abmerge; 6990 PetscBool P_oth_bind; 6991 } MatMatMPIAIJBACKEND; 6992 6993 static PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) 6994 { 6995 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6996 PetscInt i; 6997 6998 PetscFunctionBegin; 6999 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 7000 PetscCall(PetscFree(mmdata->bufa)); 7001 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 7002 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 7003 PetscCall(MatDestroy(&mmdata->P_oth)); 7004 PetscCall(MatDestroy(&mmdata->Bloc)); 7005 PetscCall(PetscSFDestroy(&mmdata->sf)); 7006 for (i = 0; i < mmdata->cp; i++) PetscCall(MatDestroy(&mmdata->mp[i])); 7007 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 7008 PetscCall(PetscFree(mmdata->own[0])); 7009 PetscCall(PetscFree(mmdata->own)); 7010 PetscCall(PetscFree(mmdata->off[0])); 7011 PetscCall(PetscFree(mmdata->off)); 7012 PetscCall(PetscFree(mmdata)); 7013 PetscFunctionReturn(PETSC_SUCCESS); 7014 } 7015 7016 /* Copy selected n entries with indices in idx[] of A to v[]. 7017 If idx is NULL, copy the whole data array of A to v[] 7018 */ 7019 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) 7020 { 7021 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 7022 7023 PetscFunctionBegin; 7024 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 7025 if (f) { 7026 PetscCall((*f)(A, n, idx, v)); 7027 } else { 7028 const PetscScalar *vv; 7029 7030 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 7031 if (n && idx) { 7032 PetscScalar *w = v; 7033 const PetscInt *oi = idx; 7034 PetscInt j; 7035 7036 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 7037 } else { 7038 PetscCall(PetscArraycpy(v, vv, n)); 7039 } 7040 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 7041 } 7042 PetscFunctionReturn(PETSC_SUCCESS); 7043 } 7044 7045 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) 7046 { 7047 MatMatMPIAIJBACKEND *mmdata; 7048 PetscInt i, n_d, n_o; 7049 7050 PetscFunctionBegin; 7051 MatCheckProduct(C, 1); 7052 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 7053 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 7054 if (!mmdata->reusesym) { /* update temporary matrices */ 7055 if (mmdata->P_oth) PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7056 if (mmdata->Bloc) PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); 7057 } 7058 mmdata->reusesym = PETSC_FALSE; 7059 7060 for (i = 0; i < mmdata->cp; i++) { 7061 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 7062 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 7063 } 7064 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 7065 PetscInt noff; 7066 7067 PetscCall(PetscIntCast(mmdata->off[i + 1] - mmdata->off[i], &noff)); 7068 if (mmdata->mptmp[i]) continue; 7069 if (noff) { 7070 PetscInt nown; 7071 7072 PetscCall(PetscIntCast(mmdata->own[i + 1] - mmdata->own[i], &nown)); 7073 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 7074 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 7075 n_o += noff; 7076 n_d += nown; 7077 } else { 7078 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 7079 7080 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 7081 n_d += mm->nz; 7082 } 7083 } 7084 if (mmdata->hasoffproc) { /* offprocess insertion */ 7085 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7086 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 7087 } 7088 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 7089 PetscFunctionReturn(PETSC_SUCCESS); 7090 } 7091 7092 /* Support for Pt * A, A * P, or Pt * A * P */ 7093 #define MAX_NUMBER_INTERMEDIATE 4 7094 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) 7095 { 7096 Mat_Product *product = C->product; 7097 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 7098 Mat_MPIAIJ *a, *p; 7099 MatMatMPIAIJBACKEND *mmdata; 7100 ISLocalToGlobalMapping P_oth_l2g = NULL; 7101 IS glob = NULL; 7102 const char *prefix; 7103 char pprefix[256]; 7104 const PetscInt *globidx, *P_oth_idx; 7105 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 7106 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 7107 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 7108 /* type-0: consecutive, start from 0; type-1: consecutive with */ 7109 /* a base offset; type-2: sparse with a local to global map table */ 7110 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 7111 7112 MatProductType ptype; 7113 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iship, iskokk; 7114 PetscMPIInt size; 7115 7116 PetscFunctionBegin; 7117 MatCheckProduct(C, 1); 7118 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 7119 ptype = product->type; 7120 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 7121 ptype = MATPRODUCT_AB; 7122 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 7123 } 7124 switch (ptype) { 7125 case MATPRODUCT_AB: 7126 A = product->A; 7127 P = product->B; 7128 m = A->rmap->n; 7129 n = P->cmap->n; 7130 M = A->rmap->N; 7131 N = P->cmap->N; 7132 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 7133 break; 7134 case MATPRODUCT_AtB: 7135 P = product->A; 7136 A = product->B; 7137 m = P->cmap->n; 7138 n = A->cmap->n; 7139 M = P->cmap->N; 7140 N = A->cmap->N; 7141 hasoffproc = PETSC_TRUE; 7142 break; 7143 case MATPRODUCT_PtAP: 7144 A = product->A; 7145 P = product->B; 7146 m = P->cmap->n; 7147 n = P->cmap->n; 7148 M = P->cmap->N; 7149 N = P->cmap->N; 7150 hasoffproc = PETSC_TRUE; 7151 break; 7152 default: 7153 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7154 } 7155 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 7156 if (size == 1) hasoffproc = PETSC_FALSE; 7157 7158 /* defaults */ 7159 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 7160 mp[i] = NULL; 7161 mptmp[i] = PETSC_FALSE; 7162 rmapt[i] = -1; 7163 cmapt[i] = -1; 7164 rmapa[i] = NULL; 7165 cmapa[i] = NULL; 7166 } 7167 7168 /* customization */ 7169 PetscCall(PetscNew(&mmdata)); 7170 mmdata->reusesym = product->api_user; 7171 if (ptype == MATPRODUCT_AB) { 7172 if (product->api_user) { 7173 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 7174 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7175 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7176 PetscOptionsEnd(); 7177 } else { 7178 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 7179 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 7180 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7181 PetscOptionsEnd(); 7182 } 7183 } else if (ptype == MATPRODUCT_PtAP) { 7184 if (product->api_user) { 7185 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 7186 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7187 PetscOptionsEnd(); 7188 } else { 7189 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 7190 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 7191 PetscOptionsEnd(); 7192 } 7193 } 7194 a = (Mat_MPIAIJ *)A->data; 7195 p = (Mat_MPIAIJ *)P->data; 7196 PetscCall(MatSetSizes(C, m, n, M, N)); 7197 PetscCall(PetscLayoutSetUp(C->rmap)); 7198 PetscCall(PetscLayoutSetUp(C->cmap)); 7199 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7200 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7201 7202 cp = 0; 7203 switch (ptype) { 7204 case MATPRODUCT_AB: /* A * P */ 7205 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7206 7207 /* A_diag * P_local (merged or not) */ 7208 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7209 /* P is product->B */ 7210 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7211 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7212 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7213 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7214 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7215 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7216 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7217 mp[cp]->product->api_user = product->api_user; 7218 PetscCall(MatProductSetFromOptions(mp[cp])); 7219 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7220 PetscCall(ISGetIndices(glob, &globidx)); 7221 rmapt[cp] = 1; 7222 cmapt[cp] = 2; 7223 cmapa[cp] = globidx; 7224 mptmp[cp] = PETSC_FALSE; 7225 cp++; 7226 } else { /* A_diag * P_diag and A_diag * P_off */ 7227 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7228 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7229 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7230 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7231 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7232 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7233 mp[cp]->product->api_user = product->api_user; 7234 PetscCall(MatProductSetFromOptions(mp[cp])); 7235 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7236 rmapt[cp] = 1; 7237 cmapt[cp] = 1; 7238 mptmp[cp] = PETSC_FALSE; 7239 cp++; 7240 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7241 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7242 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7243 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7244 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7245 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7246 mp[cp]->product->api_user = product->api_user; 7247 PetscCall(MatProductSetFromOptions(mp[cp])); 7248 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7249 rmapt[cp] = 1; 7250 cmapt[cp] = 2; 7251 cmapa[cp] = p->garray; 7252 mptmp[cp] = PETSC_FALSE; 7253 cp++; 7254 } 7255 7256 /* A_off * P_other */ 7257 if (mmdata->P_oth) { 7258 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7259 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7260 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7261 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7262 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7263 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7264 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7265 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7266 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7267 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7268 mp[cp]->product->api_user = product->api_user; 7269 PetscCall(MatProductSetFromOptions(mp[cp])); 7270 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7271 rmapt[cp] = 1; 7272 cmapt[cp] = 2; 7273 cmapa[cp] = P_oth_idx; 7274 mptmp[cp] = PETSC_FALSE; 7275 cp++; 7276 } 7277 break; 7278 7279 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7280 /* A is product->B */ 7281 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7282 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7283 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7284 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7285 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7286 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7287 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7288 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7289 mp[cp]->product->api_user = product->api_user; 7290 PetscCall(MatProductSetFromOptions(mp[cp])); 7291 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7292 PetscCall(ISGetIndices(glob, &globidx)); 7293 rmapt[cp] = 2; 7294 rmapa[cp] = globidx; 7295 cmapt[cp] = 2; 7296 cmapa[cp] = globidx; 7297 mptmp[cp] = PETSC_FALSE; 7298 cp++; 7299 } else { 7300 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7301 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7302 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7303 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7304 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7305 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7306 mp[cp]->product->api_user = product->api_user; 7307 PetscCall(MatProductSetFromOptions(mp[cp])); 7308 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7309 PetscCall(ISGetIndices(glob, &globidx)); 7310 rmapt[cp] = 1; 7311 cmapt[cp] = 2; 7312 cmapa[cp] = globidx; 7313 mptmp[cp] = PETSC_FALSE; 7314 cp++; 7315 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7316 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7317 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7318 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7319 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7320 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7321 mp[cp]->product->api_user = product->api_user; 7322 PetscCall(MatProductSetFromOptions(mp[cp])); 7323 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7324 rmapt[cp] = 2; 7325 rmapa[cp] = p->garray; 7326 cmapt[cp] = 2; 7327 cmapa[cp] = globidx; 7328 mptmp[cp] = PETSC_FALSE; 7329 cp++; 7330 } 7331 break; 7332 case MATPRODUCT_PtAP: 7333 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7334 /* P is product->B */ 7335 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7336 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7337 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7338 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7339 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7340 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7341 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7342 mp[cp]->product->api_user = product->api_user; 7343 PetscCall(MatProductSetFromOptions(mp[cp])); 7344 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7345 PetscCall(ISGetIndices(glob, &globidx)); 7346 rmapt[cp] = 2; 7347 rmapa[cp] = globidx; 7348 cmapt[cp] = 2; 7349 cmapa[cp] = globidx; 7350 mptmp[cp] = PETSC_FALSE; 7351 cp++; 7352 if (mmdata->P_oth) { 7353 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7354 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7355 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)a->B)->type_name)); 7356 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7357 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7358 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7359 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7360 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7361 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7362 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7363 mp[cp]->product->api_user = product->api_user; 7364 PetscCall(MatProductSetFromOptions(mp[cp])); 7365 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7366 mptmp[cp] = PETSC_TRUE; 7367 cp++; 7368 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7369 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7370 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7371 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7372 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7373 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7374 mp[cp]->product->api_user = product->api_user; 7375 PetscCall(MatProductSetFromOptions(mp[cp])); 7376 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7377 rmapt[cp] = 2; 7378 rmapa[cp] = globidx; 7379 cmapt[cp] = 2; 7380 cmapa[cp] = P_oth_idx; 7381 mptmp[cp] = PETSC_FALSE; 7382 cp++; 7383 } 7384 break; 7385 default: 7386 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7387 } 7388 /* sanity check */ 7389 if (size > 1) 7390 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7391 7392 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7393 for (i = 0; i < cp; i++) { 7394 mmdata->mp[i] = mp[i]; 7395 mmdata->mptmp[i] = mptmp[i]; 7396 } 7397 mmdata->cp = cp; 7398 C->product->data = mmdata; 7399 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7400 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7401 7402 /* memory type */ 7403 mmdata->mtype = PETSC_MEMTYPE_HOST; 7404 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7405 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iship, MATSEQAIJHIPSPARSE, MATMPIAIJHIPSPARSE, "")); 7406 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7407 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7408 else if (iship) mmdata->mtype = PETSC_MEMTYPE_HIP; 7409 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7410 7411 /* prepare coo coordinates for values insertion */ 7412 7413 /* count total nonzeros of those intermediate seqaij Mats 7414 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7415 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7416 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7417 */ 7418 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7419 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7420 if (mptmp[cp]) continue; 7421 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7422 const PetscInt *rmap = rmapa[cp]; 7423 const PetscInt mr = mp[cp]->rmap->n; 7424 const PetscInt rs = C->rmap->rstart; 7425 const PetscInt re = C->rmap->rend; 7426 const PetscInt *ii = mm->i; 7427 for (i = 0; i < mr; i++) { 7428 const PetscInt gr = rmap[i]; 7429 const PetscInt nz = ii[i + 1] - ii[i]; 7430 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7431 else ncoo_oown += nz; /* this row is local */ 7432 } 7433 } else ncoo_d += mm->nz; 7434 } 7435 7436 /* 7437 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7438 7439 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7440 7441 off[0] points to a big index array, which is shared by off[1,2,...]. Similarly, for own[0]. 7442 7443 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7444 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7445 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7446 7447 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7448 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaining part stores i of nonzeros I will receive. 7449 */ 7450 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7451 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7452 7453 /* gather (i,j) of nonzeros inserted by remote procs */ 7454 if (hasoffproc) { 7455 PetscSF msf; 7456 PetscInt ncoo2, *coo_i2, *coo_j2; 7457 7458 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7459 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7460 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7461 7462 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7463 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7464 PetscInt *idxoff = mmdata->off[cp]; 7465 PetscInt *idxown = mmdata->own[cp]; 7466 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7467 const PetscInt *rmap = rmapa[cp]; 7468 const PetscInt *cmap = cmapa[cp]; 7469 const PetscInt *ii = mm->i; 7470 PetscInt *coi = coo_i + ncoo_o; 7471 PetscInt *coj = coo_j + ncoo_o; 7472 const PetscInt mr = mp[cp]->rmap->n; 7473 const PetscInt rs = C->rmap->rstart; 7474 const PetscInt re = C->rmap->rend; 7475 const PetscInt cs = C->cmap->rstart; 7476 for (i = 0; i < mr; i++) { 7477 const PetscInt *jj = mm->j + ii[i]; 7478 const PetscInt gr = rmap[i]; 7479 const PetscInt nz = ii[i + 1] - ii[i]; 7480 if (gr < rs || gr >= re) { /* this is an offproc row */ 7481 for (j = ii[i]; j < ii[i + 1]; j++) { 7482 *coi++ = gr; 7483 *idxoff++ = j; 7484 } 7485 if (!cmapt[cp]) { /* already global */ 7486 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7487 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7488 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7489 } else { /* offdiag */ 7490 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7491 } 7492 ncoo_o += nz; 7493 } else { /* this is a local row */ 7494 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7495 } 7496 } 7497 } 7498 mmdata->off[cp + 1] = idxoff; 7499 mmdata->own[cp + 1] = idxown; 7500 } 7501 7502 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7503 PetscInt incoo_o; 7504 PetscCall(PetscIntCast(ncoo_o, &incoo_o)); 7505 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, incoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7506 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7507 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7508 ncoo = ncoo_d + ncoo_oown + ncoo2; 7509 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7510 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7511 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7512 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7513 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7514 PetscCall(PetscFree2(coo_i, coo_j)); 7515 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7516 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7517 coo_i = coo_i2; 7518 coo_j = coo_j2; 7519 } else { /* no offproc values insertion */ 7520 ncoo = ncoo_d; 7521 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7522 7523 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7524 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7525 PetscCall(PetscSFSetUp(mmdata->sf)); 7526 } 7527 mmdata->hasoffproc = hasoffproc; 7528 7529 /* gather (i,j) of nonzeros inserted locally */ 7530 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7531 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7532 PetscInt *coi = coo_i + ncoo_d; 7533 PetscInt *coj = coo_j + ncoo_d; 7534 const PetscInt *jj = mm->j; 7535 const PetscInt *ii = mm->i; 7536 const PetscInt *cmap = cmapa[cp]; 7537 const PetscInt *rmap = rmapa[cp]; 7538 const PetscInt mr = mp[cp]->rmap->n; 7539 const PetscInt rs = C->rmap->rstart; 7540 const PetscInt re = C->rmap->rend; 7541 const PetscInt cs = C->cmap->rstart; 7542 7543 if (mptmp[cp]) continue; 7544 if (rmapt[cp] == 1) { /* consecutive rows */ 7545 /* fill coo_i */ 7546 for (i = 0; i < mr; i++) { 7547 const PetscInt gr = i + rs; 7548 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7549 } 7550 /* fill coo_j */ 7551 if (!cmapt[cp]) { /* type-0, already global */ 7552 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7553 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7554 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7555 } else { /* type-2, local to global for sparse columns */ 7556 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7557 } 7558 ncoo_d += mm->nz; 7559 } else if (rmapt[cp] == 2) { /* sparse rows */ 7560 for (i = 0; i < mr; i++) { 7561 const PetscInt *jj = mm->j + ii[i]; 7562 const PetscInt gr = rmap[i]; 7563 const PetscInt nz = ii[i + 1] - ii[i]; 7564 if (gr >= rs && gr < re) { /* local rows */ 7565 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7566 if (!cmapt[cp]) { /* type-0, already global */ 7567 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7568 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7569 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7570 } else { /* type-2, local to global for sparse columns */ 7571 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7572 } 7573 ncoo_d += nz; 7574 } 7575 } 7576 } 7577 } 7578 if (glob) PetscCall(ISRestoreIndices(glob, &globidx)); 7579 PetscCall(ISDestroy(&glob)); 7580 if (P_oth_l2g) PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); 7581 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7582 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7583 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7584 7585 /* set block sizes */ 7586 A = product->A; 7587 P = product->B; 7588 switch (ptype) { 7589 case MATPRODUCT_PtAP: 7590 PetscCall(MatSetBlockSizes(C, P->cmap->bs, P->cmap->bs)); 7591 break; 7592 case MATPRODUCT_RARt: 7593 PetscCall(MatSetBlockSizes(C, P->rmap->bs, P->rmap->bs)); 7594 break; 7595 case MATPRODUCT_ABC: 7596 PetscCall(MatSetBlockSizesFromMats(C, A, product->C)); 7597 break; 7598 case MATPRODUCT_AB: 7599 PetscCall(MatSetBlockSizesFromMats(C, A, P)); 7600 break; 7601 case MATPRODUCT_AtB: 7602 PetscCall(MatSetBlockSizes(C, A->cmap->bs, P->cmap->bs)); 7603 break; 7604 case MATPRODUCT_ABt: 7605 PetscCall(MatSetBlockSizes(C, A->rmap->bs, P->rmap->bs)); 7606 break; 7607 default: 7608 SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for ProductType %s", MatProductTypes[ptype]); 7609 } 7610 7611 /* preallocate with COO data */ 7612 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7613 PetscCall(PetscFree2(coo_i, coo_j)); 7614 PetscFunctionReturn(PETSC_SUCCESS); 7615 } 7616 7617 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) 7618 { 7619 Mat_Product *product = mat->product; 7620 #if defined(PETSC_HAVE_DEVICE) 7621 PetscBool match = PETSC_FALSE; 7622 PetscBool usecpu = PETSC_FALSE; 7623 #else 7624 PetscBool match = PETSC_TRUE; 7625 #endif 7626 7627 PetscFunctionBegin; 7628 MatCheckProduct(mat, 1); 7629 #if defined(PETSC_HAVE_DEVICE) 7630 if (!product->A->boundtocpu && !product->B->boundtocpu) PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); 7631 if (match) { /* we can always fallback to the CPU if requested */ 7632 switch (product->type) { 7633 case MATPRODUCT_AB: 7634 if (product->api_user) { 7635 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7636 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7637 PetscOptionsEnd(); 7638 } else { 7639 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7640 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7641 PetscOptionsEnd(); 7642 } 7643 break; 7644 case MATPRODUCT_AtB: 7645 if (product->api_user) { 7646 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7647 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7648 PetscOptionsEnd(); 7649 } else { 7650 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7651 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7652 PetscOptionsEnd(); 7653 } 7654 break; 7655 case MATPRODUCT_PtAP: 7656 if (product->api_user) { 7657 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7658 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7659 PetscOptionsEnd(); 7660 } else { 7661 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7662 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7663 PetscOptionsEnd(); 7664 } 7665 break; 7666 default: 7667 break; 7668 } 7669 match = (PetscBool)!usecpu; 7670 } 7671 #endif 7672 if (match) { 7673 switch (product->type) { 7674 case MATPRODUCT_AB: 7675 case MATPRODUCT_AtB: 7676 case MATPRODUCT_PtAP: 7677 mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; 7678 break; 7679 default: 7680 break; 7681 } 7682 } 7683 /* fallback to MPIAIJ ops */ 7684 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7685 PetscFunctionReturn(PETSC_SUCCESS); 7686 } 7687 7688 /* 7689 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7690 7691 n - the number of block indices in cc[] 7692 cc - the block indices (must be large enough to contain the indices) 7693 */ 7694 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) 7695 { 7696 PetscInt cnt = -1, nidx, j; 7697 const PetscInt *idx; 7698 7699 PetscFunctionBegin; 7700 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7701 if (nidx) { 7702 cnt = 0; 7703 cc[cnt] = idx[0] / bs; 7704 for (j = 1; j < nidx; j++) { 7705 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7706 } 7707 } 7708 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7709 *n = cnt + 1; 7710 PetscFunctionReturn(PETSC_SUCCESS); 7711 } 7712 7713 /* 7714 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7715 7716 ncollapsed - the number of block indices 7717 collapsed - the block indices (must be large enough to contain the indices) 7718 */ 7719 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) 7720 { 7721 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7722 7723 PetscFunctionBegin; 7724 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7725 for (i = start + 1; i < start + bs; i++) { 7726 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7727 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7728 cprevtmp = cprev; 7729 cprev = merged; 7730 merged = cprevtmp; 7731 } 7732 *ncollapsed = nprev; 7733 if (collapsed) *collapsed = cprev; 7734 PetscFunctionReturn(PETSC_SUCCESS); 7735 } 7736 7737 /* 7738 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7739 7740 Input Parameter: 7741 . Amat - matrix 7742 - symmetrize - make the result symmetric 7743 + scale - scale with diagonal 7744 7745 Output Parameter: 7746 . a_Gmat - output scalar graph >= 0 7747 7748 */ 7749 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, PetscReal filter, PetscInt index_size, PetscInt index[], Mat *a_Gmat) 7750 { 7751 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7752 MPI_Comm comm; 7753 Mat Gmat; 7754 PetscBool ismpiaij, isseqaij; 7755 Mat a, b, c; 7756 MatType jtype; 7757 7758 PetscFunctionBegin; 7759 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7760 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7761 PetscCall(MatGetSize(Amat, &MM, &NN)); 7762 PetscCall(MatGetBlockSize(Amat, &bs)); 7763 nloc = (Iend - Istart) / bs; 7764 7765 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7766 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7767 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7768 7769 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7770 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7771 implementation */ 7772 if (bs > 1) { 7773 PetscCall(MatGetType(Amat, &jtype)); 7774 PetscCall(MatCreate(comm, &Gmat)); 7775 PetscCall(MatSetType(Gmat, jtype)); 7776 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7777 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7778 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7779 PetscInt *d_nnz, *o_nnz; 7780 MatScalar *aa, val, *AA; 7781 PetscInt *aj, *ai, *AJ, nc, nmax = 0; 7782 7783 if (isseqaij) { 7784 a = Amat; 7785 b = NULL; 7786 } else { 7787 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7788 a = d->A; 7789 b = d->B; 7790 } 7791 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7792 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7793 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7794 PetscInt *nnz = (c == a) ? d_nnz : o_nnz; 7795 const PetscInt *cols1, *cols2; 7796 7797 for (PetscInt brow = 0, nc1, nc2, ok = 1; brow < nloc * bs; brow += bs) { // block rows 7798 PetscCall(MatGetRow(c, brow, &nc2, &cols2, NULL)); 7799 nnz[brow / bs] = nc2 / bs; 7800 if (nc2 % bs) ok = 0; 7801 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7802 for (PetscInt ii = 1; ii < bs; ii++) { // check for non-dense blocks 7803 PetscCall(MatGetRow(c, brow + ii, &nc1, &cols1, NULL)); 7804 if (nc1 != nc2) ok = 0; 7805 else { 7806 for (PetscInt jj = 0; jj < nc1 && ok == 1; jj++) { 7807 if (cols1[jj] != cols2[jj]) ok = 0; 7808 if (cols1[jj] % bs != jj % bs) ok = 0; 7809 } 7810 } 7811 PetscCall(MatRestoreRow(c, brow + ii, &nc1, &cols1, NULL)); 7812 } 7813 PetscCall(MatRestoreRow(c, brow, &nc2, &cols2, NULL)); 7814 if (!ok) { 7815 PetscCall(PetscFree2(d_nnz, o_nnz)); 7816 PetscCall(PetscInfo(Amat, "Found sparse blocks - revert to slow method\n")); 7817 goto old_bs; 7818 } 7819 } 7820 } 7821 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7822 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7823 PetscCall(PetscFree2(d_nnz, o_nnz)); 7824 PetscCall(PetscMalloc2(nmax, &AA, nmax, &AJ)); 7825 // diag 7826 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7827 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7828 7829 ai = aseq->i; 7830 n = ai[brow + 1] - ai[brow]; 7831 aj = aseq->j + ai[brow]; 7832 for (PetscInt k = 0; k < n; k += bs) { // block columns 7833 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7834 val = 0; 7835 if (index_size == 0) { 7836 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7837 aa = aseq->a + ai[brow + ii] + k; 7838 for (PetscInt jj = 0; jj < bs; jj++) { // columns in block 7839 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7840 } 7841 } 7842 } else { // use (index,index) value if provided 7843 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7844 PetscInt ii = index[iii]; 7845 aa = aseq->a + ai[brow + ii] + k; 7846 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // columns in block 7847 PetscInt jj = index[jjj]; 7848 val += PetscAbs(PetscRealPart(aa[jj])); 7849 } 7850 } 7851 } 7852 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7853 AA[k / bs] = val; 7854 } 7855 grow = Istart / bs + brow / bs; 7856 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, ADD_VALUES)); 7857 } 7858 // off-diag 7859 if (ismpiaij) { 7860 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7861 const PetscScalar *vals; 7862 const PetscInt *cols, *garray = aij->garray; 7863 7864 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7865 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7866 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7867 for (PetscInt k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7868 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs >= nmax"); 7869 AA[k / bs] = 0; 7870 AJ[cidx] = garray[cols[k]] / bs; 7871 } 7872 nc = ncols / bs; 7873 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7874 if (index_size == 0) { 7875 for (PetscInt ii = 0; ii < bs; ii++) { // rows in block 7876 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7877 for (PetscInt k = 0; k < ncols; k += bs) { 7878 for (PetscInt jj = 0; jj < bs; jj++) { // cols in block 7879 PetscAssert(k / bs < nmax, comm, PETSC_ERR_USER, "k / bs (%" PetscInt_FMT ") >= nmax (%" PetscInt_FMT ")", k / bs, nmax); 7880 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7881 } 7882 } 7883 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7884 } 7885 } else { // use (index,index) value if provided 7886 for (PetscInt iii = 0; iii < index_size; iii++) { // rows in block 7887 PetscInt ii = index[iii]; 7888 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7889 for (PetscInt k = 0; k < ncols; k += bs) { 7890 for (PetscInt jjj = 0; jjj < index_size; jjj++) { // cols in block 7891 PetscInt jj = index[jjj]; 7892 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7893 } 7894 } 7895 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7896 } 7897 } 7898 grow = Istart / bs + brow / bs; 7899 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, ADD_VALUES)); 7900 } 7901 } 7902 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7903 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7904 PetscCall(PetscFree2(AA, AJ)); 7905 } else { 7906 const PetscScalar *vals; 7907 const PetscInt *idx; 7908 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7909 old_bs: 7910 /* 7911 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7912 */ 7913 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7914 PetscCall(PetscMalloc2(nloc, &d_nnz, (isseqaij ? 0 : nloc), &o_nnz)); 7915 if (isseqaij) { 7916 PetscInt max_d_nnz; 7917 7918 /* 7919 Determine exact preallocation count for (sequential) scalar matrix 7920 */ 7921 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7922 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7923 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7924 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7925 PetscCall(PetscFree3(w0, w1, w2)); 7926 } else if (ismpiaij) { 7927 Mat Daij, Oaij; 7928 const PetscInt *garray; 7929 PetscInt max_d_nnz; 7930 7931 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7932 /* 7933 Determine exact preallocation count for diagonal block portion of scalar matrix 7934 */ 7935 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7936 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7937 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7938 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); 7939 PetscCall(PetscFree3(w0, w1, w2)); 7940 /* 7941 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7942 */ 7943 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7944 o_nnz[jj] = 0; 7945 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7946 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7947 o_nnz[jj] += ncols; 7948 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7949 } 7950 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7951 } 7952 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7953 /* get scalar copy (norms) of matrix */ 7954 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7955 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7956 PetscCall(PetscFree2(d_nnz, o_nnz)); 7957 for (Ii = Istart; Ii < Iend; Ii++) { 7958 PetscInt dest_row = Ii / bs; 7959 7960 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7961 for (jj = 0; jj < ncols; jj++) { 7962 PetscInt dest_col = idx[jj] / bs; 7963 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7964 7965 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7966 } 7967 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7968 } 7969 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7970 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7971 } 7972 } else { 7973 if (symmetrize || filter >= 0 || scale) PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7974 else { 7975 Gmat = Amat; 7976 PetscCall(PetscObjectReference((PetscObject)Gmat)); 7977 } 7978 if (isseqaij) { 7979 a = Gmat; 7980 b = NULL; 7981 } else { 7982 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7983 a = d->A; 7984 b = d->B; 7985 } 7986 if (filter >= 0 || scale) { 7987 /* take absolute value of each entry */ 7988 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7989 MatInfo info; 7990 PetscScalar *avals; 7991 7992 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7993 PetscCall(MatSeqAIJGetArray(c, &avals)); 7994 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7995 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7996 } 7997 } 7998 } 7999 if (symmetrize) { 8000 PetscBool isset, issym; 8001 8002 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 8003 if (!isset || !issym) { 8004 Mat matTrans; 8005 8006 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 8007 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric == PETSC_BOOL3_TRUE ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 8008 PetscCall(MatDestroy(&matTrans)); 8009 } 8010 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 8011 } else if (Amat != Gmat) PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 8012 if (scale) { 8013 /* scale c for all diagonal values = 1 or -1 */ 8014 Vec diag; 8015 8016 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 8017 PetscCall(MatGetDiagonal(Gmat, diag)); 8018 PetscCall(VecReciprocal(diag)); 8019 PetscCall(VecSqrtAbs(diag)); 8020 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 8021 PetscCall(VecDestroy(&diag)); 8022 } 8023 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 8024 if (filter >= 0) { 8025 PetscCall(MatFilter(Gmat, filter, PETSC_TRUE, PETSC_TRUE)); 8026 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_filter_graph_view")); 8027 } 8028 *a_Gmat = Gmat; 8029 PetscFunctionReturn(PETSC_SUCCESS); 8030 } 8031 8032 /* 8033 Special version for direct calls from Fortran 8034 */ 8035 8036 /* Change these macros so can be used in void function */ 8037 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 8038 #undef PetscCall 8039 #define PetscCall(...) \ 8040 do { \ 8041 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 8042 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 8043 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 8044 return; \ 8045 } \ 8046 } while (0) 8047 8048 #undef SETERRQ 8049 #define SETERRQ(comm, ierr, ...) \ 8050 do { \ 8051 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 8052 return; \ 8053 } while (0) 8054 8055 #if defined(PETSC_HAVE_FORTRAN_CAPS) 8056 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 8057 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 8058 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 8059 #else 8060 #endif 8061 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) 8062 { 8063 Mat mat = *mmat; 8064 PetscInt m = *mm, n = *mn; 8065 InsertMode addv = *maddv; 8066 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 8067 PetscScalar value; 8068 8069 MatCheckPreallocated(mat, 1); 8070 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 8071 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 8072 { 8073 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 8074 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 8075 PetscBool roworiented = aij->roworiented; 8076 8077 /* Some Variables required in the macro */ 8078 Mat A = aij->A; 8079 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 8080 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 8081 MatScalar *aa; 8082 PetscBool ignorezeroentries = ((a->ignorezeroentries && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 8083 Mat B = aij->B; 8084 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 8085 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 8086 MatScalar *ba; 8087 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 8088 * cannot use "#if defined" inside a macro. */ 8089 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 8090 8091 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 8092 PetscInt nonew = a->nonew; 8093 MatScalar *ap1, *ap2; 8094 8095 PetscFunctionBegin; 8096 PetscCall(MatSeqAIJGetArray(A, &aa)); 8097 PetscCall(MatSeqAIJGetArray(B, &ba)); 8098 for (i = 0; i < m; i++) { 8099 if (im[i] < 0) continue; 8100 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 8101 if (im[i] >= rstart && im[i] < rend) { 8102 row = im[i] - rstart; 8103 lastcol1 = -1; 8104 rp1 = aj + ai[row]; 8105 ap1 = aa + ai[row]; 8106 rmax1 = aimax[row]; 8107 nrow1 = ailen[row]; 8108 low1 = 0; 8109 high1 = nrow1; 8110 lastcol2 = -1; 8111 rp2 = bj + bi[row]; 8112 ap2 = ba + bi[row]; 8113 rmax2 = bimax[row]; 8114 nrow2 = bilen[row]; 8115 low2 = 0; 8116 high2 = nrow2; 8117 8118 for (j = 0; j < n; j++) { 8119 if (roworiented) value = v[i * n + j]; 8120 else value = v[i + j * m]; 8121 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 8122 if (in[j] >= cstart && in[j] < cend) { 8123 col = in[j] - cstart; 8124 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 8125 } else if (in[j] < 0) continue; 8126 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 8127 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 8128 } else { 8129 if (mat->was_assembled) { 8130 if (!aij->colmap) PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); 8131 #if defined(PETSC_USE_CTABLE) 8132 PetscCall(PetscHMapIGetWithDefault(aij->colmap, in[j] + 1, 0, &col)); 8133 col--; 8134 #else 8135 col = aij->colmap[in[j]] - 1; 8136 #endif 8137 if (col < 0 && !((Mat_SeqAIJ *)aij->A->data)->nonew) { 8138 PetscCall(MatDisAssemble_MPIAIJ(mat, PETSC_FALSE)); 8139 col = in[j]; 8140 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 8141 B = aij->B; 8142 b = (Mat_SeqAIJ *)B->data; 8143 bimax = b->imax; 8144 bi = b->i; 8145 bilen = b->ilen; 8146 bj = b->j; 8147 rp2 = bj + bi[row]; 8148 ap2 = ba + bi[row]; 8149 rmax2 = bimax[row]; 8150 nrow2 = bilen[row]; 8151 low2 = 0; 8152 high2 = nrow2; 8153 bm = aij->B->rmap->n; 8154 ba = b->a; 8155 inserted = PETSC_FALSE; 8156 } 8157 } else col = in[j]; 8158 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 8159 } 8160 } 8161 } else if (!aij->donotstash) { 8162 if (roworiented) { 8163 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8164 } else { 8165 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 8166 } 8167 } 8168 } 8169 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 8170 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 8171 } 8172 PetscFunctionReturnVoid(); 8173 } 8174 8175 /* Undefining these here since they were redefined from their original definition above! No 8176 * other PETSc functions should be defined past this point, as it is impossible to recover the 8177 * original definitions */ 8178 #undef PetscCall 8179 #undef SETERRQ 8180