1 #include <../src/mat/impls/aij/mpi/mpiaij.h> /*I "petscmat.h" I*/ 2 #include <petsc/private/vecimpl.h> 3 #include <petsc/private/sfimpl.h> 4 #include <petsc/private/isimpl.h> 5 #include <petscblaslapack.h> 6 #include <petscsf.h> 7 #include <petsc/private/hashmapi.h> 8 9 PetscErrorCode MatGetRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 10 Mat B; 11 12 PetscFunctionBegin; 13 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &B)); 14 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject)B)); 15 PetscCall(MatGetRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 16 PetscCall(MatDestroy(&B)); 17 PetscFunctionReturn(0); 18 } 19 20 PetscErrorCode MatRestoreRowIJ_MPIAIJ(Mat A, PetscInt oshift, PetscBool symmetric, PetscBool inodecompressed, PetscInt *m, const PetscInt *ia[], const PetscInt *ja[], PetscBool *done) { 21 Mat B; 22 23 PetscFunctionBegin; 24 PetscCall(PetscObjectQuery((PetscObject)A, "MatGetRowIJ_MPIAIJ", (PetscObject *)&B)); 25 PetscCall(MatRestoreRowIJ(B, oshift, symmetric, inodecompressed, m, ia, ja, done)); 26 PetscCall(PetscObjectCompose((PetscObject)A, "MatGetRowIJ_MPIAIJ", NULL)); 27 PetscFunctionReturn(0); 28 } 29 30 /*MC 31 MATAIJ - MATAIJ = "aij" - A matrix type to be used for sparse matrices. 32 33 This matrix type is identical to MATSEQAIJ when constructed with a single process communicator, 34 and MATMPIAIJ otherwise. As a result, for single process communicators, 35 MatSeqAIJSetPreallocation is supported, and similarly MatMPIAIJSetPreallocation() is supported 36 for communicators controlling multiple processes. It is recommended that you call both of 37 the above preallocation routines for simplicity. 38 39 Options Database Keys: 40 . -mat_type aij - sets the matrix type to "aij" during a call to MatSetFromOptions() 41 42 Developer Notes: 43 Subclasses include MATAIJCUSPARSE, MATAIJPERM, MATAIJSELL, MATAIJMKL, MATAIJCRL, and also automatically switches over to use inodes when 44 enough exist. 45 46 Level: beginner 47 48 .seealso: `MatCreateAIJ()`, `MatCreateSeqAIJ()`, `MATSEQAIJ`, `MATMPIAIJ` 49 M*/ 50 51 /*MC 52 MATAIJCRL - MATAIJCRL = "aijcrl" - A matrix type to be used for sparse matrices. 53 54 This matrix type is identical to MATSEQAIJCRL when constructed with a single process communicator, 55 and MATMPIAIJCRL otherwise. As a result, for single process communicators, 56 MatSeqAIJSetPreallocation() is supported, and similarly MatMPIAIJSetPreallocation() is supported 57 for communicators controlling multiple processes. It is recommended that you call both of 58 the above preallocation routines for simplicity. 59 60 Options Database Keys: 61 . -mat_type aijcrl - sets the matrix type to "aijcrl" during a call to MatSetFromOptions() 62 63 Level: beginner 64 65 .seealso: `MatCreateMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL`, `MATSEQAIJCRL`, `MATMPIAIJCRL` 66 M*/ 67 68 static PetscErrorCode MatBindToCPU_MPIAIJ(Mat A, PetscBool flg) { 69 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 70 71 PetscFunctionBegin; 72 #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_VIENNACL) 73 A->boundtocpu = flg; 74 #endif 75 if (a->A) PetscCall(MatBindToCPU(a->A, flg)); 76 if (a->B) PetscCall(MatBindToCPU(a->B, flg)); 77 78 /* In addition to binding the diagonal and off-diagonal matrices, bind the local vectors used for matrix-vector products. 79 * This maybe seems a little odd for a MatBindToCPU() call to do, but it makes no sense for the binding of these vectors 80 * to differ from the parent matrix. */ 81 if (a->lvec) PetscCall(VecBindToCPU(a->lvec, flg)); 82 if (a->diag) PetscCall(VecBindToCPU(a->diag, flg)); 83 84 PetscFunctionReturn(0); 85 } 86 87 PetscErrorCode MatSetBlockSizes_MPIAIJ(Mat M, PetscInt rbs, PetscInt cbs) { 88 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 89 90 PetscFunctionBegin; 91 if (mat->A) { 92 PetscCall(MatSetBlockSizes(mat->A, rbs, cbs)); 93 PetscCall(MatSetBlockSizes(mat->B, rbs, 1)); 94 } 95 PetscFunctionReturn(0); 96 } 97 98 PetscErrorCode MatFindNonzeroRows_MPIAIJ(Mat M, IS *keptrows) { 99 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)M->data; 100 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data; 101 Mat_SeqAIJ *b = (Mat_SeqAIJ *)mat->B->data; 102 const PetscInt *ia, *ib; 103 const MatScalar *aa, *bb, *aav, *bav; 104 PetscInt na, nb, i, j, *rows, cnt = 0, n0rows; 105 PetscInt m = M->rmap->n, rstart = M->rmap->rstart; 106 107 PetscFunctionBegin; 108 *keptrows = NULL; 109 110 ia = a->i; 111 ib = b->i; 112 PetscCall(MatSeqAIJGetArrayRead(mat->A, &aav)); 113 PetscCall(MatSeqAIJGetArrayRead(mat->B, &bav)); 114 for (i = 0; i < m; i++) { 115 na = ia[i + 1] - ia[i]; 116 nb = ib[i + 1] - ib[i]; 117 if (!na && !nb) { 118 cnt++; 119 goto ok1; 120 } 121 aa = aav + ia[i]; 122 for (j = 0; j < na; j++) { 123 if (aa[j] != 0.0) goto ok1; 124 } 125 bb = bav + ib[i]; 126 for (j = 0; j < nb; j++) { 127 if (bb[j] != 0.0) goto ok1; 128 } 129 cnt++; 130 ok1:; 131 } 132 PetscCall(MPIU_Allreduce(&cnt, &n0rows, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)M))); 133 if (!n0rows) { 134 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 135 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 136 PetscFunctionReturn(0); 137 } 138 PetscCall(PetscMalloc1(M->rmap->n - cnt, &rows)); 139 cnt = 0; 140 for (i = 0; i < m; i++) { 141 na = ia[i + 1] - ia[i]; 142 nb = ib[i + 1] - ib[i]; 143 if (!na && !nb) continue; 144 aa = aav + ia[i]; 145 for (j = 0; j < na; j++) { 146 if (aa[j] != 0.0) { 147 rows[cnt++] = rstart + i; 148 goto ok2; 149 } 150 } 151 bb = bav + ib[i]; 152 for (j = 0; j < nb; j++) { 153 if (bb[j] != 0.0) { 154 rows[cnt++] = rstart + i; 155 goto ok2; 156 } 157 } 158 ok2:; 159 } 160 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), cnt, rows, PETSC_OWN_POINTER, keptrows)); 161 PetscCall(MatSeqAIJRestoreArrayRead(mat->A, &aav)); 162 PetscCall(MatSeqAIJRestoreArrayRead(mat->B, &bav)); 163 PetscFunctionReturn(0); 164 } 165 166 PetscErrorCode MatDiagonalSet_MPIAIJ(Mat Y, Vec D, InsertMode is) { 167 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Y->data; 168 PetscBool cong; 169 170 PetscFunctionBegin; 171 PetscCall(MatHasCongruentLayouts(Y, &cong)); 172 if (Y->assembled && cong) { 173 PetscCall(MatDiagonalSet(aij->A, D, is)); 174 } else { 175 PetscCall(MatDiagonalSet_Default(Y, D, is)); 176 } 177 PetscFunctionReturn(0); 178 } 179 180 PetscErrorCode MatFindZeroDiagonals_MPIAIJ(Mat M, IS *zrows) { 181 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)M->data; 182 PetscInt i, rstart, nrows, *rows; 183 184 PetscFunctionBegin; 185 *zrows = NULL; 186 PetscCall(MatFindZeroDiagonals_SeqAIJ_Private(aij->A, &nrows, &rows)); 187 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 188 for (i = 0; i < nrows; i++) rows[i] += rstart; 189 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)M), nrows, rows, PETSC_OWN_POINTER, zrows)); 190 PetscFunctionReturn(0); 191 } 192 193 PetscErrorCode MatGetColumnReductions_MPIAIJ(Mat A, PetscInt type, PetscReal *reductions) { 194 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)A->data; 195 PetscInt i, m, n, *garray = aij->garray; 196 Mat_SeqAIJ *a_aij = (Mat_SeqAIJ *)aij->A->data; 197 Mat_SeqAIJ *b_aij = (Mat_SeqAIJ *)aij->B->data; 198 PetscReal *work; 199 const PetscScalar *dummy; 200 201 PetscFunctionBegin; 202 PetscCall(MatGetSize(A, &m, &n)); 203 PetscCall(PetscCalloc1(n, &work)); 204 PetscCall(MatSeqAIJGetArrayRead(aij->A, &dummy)); 205 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &dummy)); 206 PetscCall(MatSeqAIJGetArrayRead(aij->B, &dummy)); 207 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &dummy)); 208 if (type == NORM_2) { 209 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i] * a_aij->a[i]); } 210 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i] * b_aij->a[i]); } 211 } else if (type == NORM_1) { 212 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscAbsScalar(a_aij->a[i]); } 213 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscAbsScalar(b_aij->a[i]); } 214 } else if (type == NORM_INFINITY) { 215 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] = PetscMax(PetscAbsScalar(a_aij->a[i]), work[A->cmap->rstart + a_aij->j[i]]); } 216 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] = PetscMax(PetscAbsScalar(b_aij->a[i]), work[garray[b_aij->j[i]]]); } 217 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 218 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscRealPart(a_aij->a[i]); } 219 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscRealPart(b_aij->a[i]); } 220 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 221 for (i = 0; i < a_aij->i[aij->A->rmap->n]; i++) { work[A->cmap->rstart + a_aij->j[i]] += PetscImaginaryPart(a_aij->a[i]); } 222 for (i = 0; i < b_aij->i[aij->B->rmap->n]; i++) { work[garray[b_aij->j[i]]] += PetscImaginaryPart(b_aij->a[i]); } 223 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 224 if (type == NORM_INFINITY) { 225 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 226 } else { 227 PetscCall(MPIU_Allreduce(work, reductions, n, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 228 } 229 PetscCall(PetscFree(work)); 230 if (type == NORM_2) { 231 for (i = 0; i < n; i++) reductions[i] = PetscSqrtReal(reductions[i]); 232 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 233 for (i = 0; i < n; i++) reductions[i] /= m; 234 } 235 PetscFunctionReturn(0); 236 } 237 238 PetscErrorCode MatFindOffBlockDiagonalEntries_MPIAIJ(Mat A, IS *is) { 239 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 240 IS sis, gis; 241 const PetscInt *isis, *igis; 242 PetscInt n, *iis, nsis, ngis, rstart, i; 243 244 PetscFunctionBegin; 245 PetscCall(MatFindOffBlockDiagonalEntries(a->A, &sis)); 246 PetscCall(MatFindNonzeroRows(a->B, &gis)); 247 PetscCall(ISGetSize(gis, &ngis)); 248 PetscCall(ISGetSize(sis, &nsis)); 249 PetscCall(ISGetIndices(sis, &isis)); 250 PetscCall(ISGetIndices(gis, &igis)); 251 252 PetscCall(PetscMalloc1(ngis + nsis, &iis)); 253 PetscCall(PetscArraycpy(iis, igis, ngis)); 254 PetscCall(PetscArraycpy(iis + ngis, isis, nsis)); 255 n = ngis + nsis; 256 PetscCall(PetscSortRemoveDupsInt(&n, iis)); 257 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 258 for (i = 0; i < n; i++) iis[i] += rstart; 259 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)A), n, iis, PETSC_OWN_POINTER, is)); 260 261 PetscCall(ISRestoreIndices(sis, &isis)); 262 PetscCall(ISRestoreIndices(gis, &igis)); 263 PetscCall(ISDestroy(&sis)); 264 PetscCall(ISDestroy(&gis)); 265 PetscFunctionReturn(0); 266 } 267 268 /* 269 Local utility routine that creates a mapping from the global column 270 number to the local number in the off-diagonal part of the local 271 storage of the matrix. When PETSC_USE_CTABLE is used this is scalable at 272 a slightly higher hash table cost; without it it is not scalable (each processor 273 has an order N integer array but is fast to access. 274 */ 275 PetscErrorCode MatCreateColmap_MPIAIJ_Private(Mat mat) { 276 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 277 PetscInt n = aij->B->cmap->n, i; 278 279 PetscFunctionBegin; 280 PetscCheck(!n || aij->garray, PETSC_COMM_SELF, PETSC_ERR_PLIB, "MPIAIJ Matrix was assembled but is missing garray"); 281 #if defined(PETSC_USE_CTABLE) 282 PetscCall(PetscTableCreate(n, mat->cmap->N + 1, &aij->colmap)); 283 for (i = 0; i < n; i++) { PetscCall(PetscTableAdd(aij->colmap, aij->garray[i] + 1, i + 1, INSERT_VALUES)); } 284 #else 285 PetscCall(PetscCalloc1(mat->cmap->N + 1, &aij->colmap)); 286 PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N + 1) * sizeof(PetscInt))); 287 for (i = 0; i < n; i++) aij->colmap[aij->garray[i]] = i + 1; 288 #endif 289 PetscFunctionReturn(0); 290 } 291 292 #define MatSetValues_SeqAIJ_A_Private(row, col, value, addv, orow, ocol) \ 293 { \ 294 if (col <= lastcol1) low1 = 0; \ 295 else high1 = nrow1; \ 296 lastcol1 = col; \ 297 while (high1 - low1 > 5) { \ 298 t = (low1 + high1) / 2; \ 299 if (rp1[t] > col) high1 = t; \ 300 else low1 = t; \ 301 } \ 302 for (_i = low1; _i < high1; _i++) { \ 303 if (rp1[_i] > col) break; \ 304 if (rp1[_i] == col) { \ 305 if (addv == ADD_VALUES) { \ 306 ap1[_i] += value; \ 307 /* Not sure LogFlops will slow dow the code or not */ \ 308 (void)PetscLogFlops(1.0); \ 309 } else ap1[_i] = value; \ 310 goto a_noinsert; \ 311 } \ 312 } \ 313 if (value == 0.0 && ignorezeroentries && row != col) { \ 314 low1 = 0; \ 315 high1 = nrow1; \ 316 goto a_noinsert; \ 317 } \ 318 if (nonew == 1) { \ 319 low1 = 0; \ 320 high1 = nrow1; \ 321 goto a_noinsert; \ 322 } \ 323 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 324 MatSeqXAIJReallocateAIJ(A, am, 1, nrow1, row, col, rmax1, aa, ai, aj, rp1, ap1, aimax, nonew, MatScalar); \ 325 N = nrow1++ - 1; \ 326 a->nz++; \ 327 high1++; \ 328 /* shift up all the later entries in this row */ \ 329 PetscCall(PetscArraymove(rp1 + _i + 1, rp1 + _i, N - _i + 1)); \ 330 PetscCall(PetscArraymove(ap1 + _i + 1, ap1 + _i, N - _i + 1)); \ 331 rp1[_i] = col; \ 332 ap1[_i] = value; \ 333 A->nonzerostate++; \ 334 a_noinsert:; \ 335 ailen[row] = nrow1; \ 336 } 337 338 #define MatSetValues_SeqAIJ_B_Private(row, col, value, addv, orow, ocol) \ 339 { \ 340 if (col <= lastcol2) low2 = 0; \ 341 else high2 = nrow2; \ 342 lastcol2 = col; \ 343 while (high2 - low2 > 5) { \ 344 t = (low2 + high2) / 2; \ 345 if (rp2[t] > col) high2 = t; \ 346 else low2 = t; \ 347 } \ 348 for (_i = low2; _i < high2; _i++) { \ 349 if (rp2[_i] > col) break; \ 350 if (rp2[_i] == col) { \ 351 if (addv == ADD_VALUES) { \ 352 ap2[_i] += value; \ 353 (void)PetscLogFlops(1.0); \ 354 } else ap2[_i] = value; \ 355 goto b_noinsert; \ 356 } \ 357 } \ 358 if (value == 0.0 && ignorezeroentries) { \ 359 low2 = 0; \ 360 high2 = nrow2; \ 361 goto b_noinsert; \ 362 } \ 363 if (nonew == 1) { \ 364 low2 = 0; \ 365 high2 = nrow2; \ 366 goto b_noinsert; \ 367 } \ 368 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 369 MatSeqXAIJReallocateAIJ(B, bm, 1, nrow2, row, col, rmax2, ba, bi, bj, rp2, ap2, bimax, nonew, MatScalar); \ 370 N = nrow2++ - 1; \ 371 b->nz++; \ 372 high2++; \ 373 /* shift up all the later entries in this row */ \ 374 PetscCall(PetscArraymove(rp2 + _i + 1, rp2 + _i, N - _i + 1)); \ 375 PetscCall(PetscArraymove(ap2 + _i + 1, ap2 + _i, N - _i + 1)); \ 376 rp2[_i] = col; \ 377 ap2[_i] = value; \ 378 B->nonzerostate++; \ 379 b_noinsert:; \ 380 bilen[row] = nrow2; \ 381 } 382 383 PetscErrorCode MatSetValuesRow_MPIAIJ(Mat A, PetscInt row, const PetscScalar v[]) { 384 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 385 Mat_SeqAIJ *a = (Mat_SeqAIJ *)mat->A->data, *b = (Mat_SeqAIJ *)mat->B->data; 386 PetscInt l, *garray = mat->garray, diag; 387 PetscScalar *aa, *ba; 388 389 PetscFunctionBegin; 390 /* code only works for square matrices A */ 391 392 /* find size of row to the left of the diagonal part */ 393 PetscCall(MatGetOwnershipRange(A, &diag, NULL)); 394 row = row - diag; 395 for (l = 0; l < b->i[row + 1] - b->i[row]; l++) { 396 if (garray[b->j[b->i[row] + l]] > diag) break; 397 } 398 if (l) { 399 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 400 PetscCall(PetscArraycpy(ba + b->i[row], v, l)); 401 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 402 } 403 404 /* diagonal part */ 405 if (a->i[row + 1] - a->i[row]) { 406 PetscCall(MatSeqAIJGetArray(mat->A, &aa)); 407 PetscCall(PetscArraycpy(aa + a->i[row], v + l, (a->i[row + 1] - a->i[row]))); 408 PetscCall(MatSeqAIJRestoreArray(mat->A, &aa)); 409 } 410 411 /* right of diagonal part */ 412 if (b->i[row + 1] - b->i[row] - l) { 413 PetscCall(MatSeqAIJGetArray(mat->B, &ba)); 414 PetscCall(PetscArraycpy(ba + b->i[row] + l, v + l + a->i[row + 1] - a->i[row], b->i[row + 1] - b->i[row] - l)); 415 PetscCall(MatSeqAIJRestoreArray(mat->B, &ba)); 416 } 417 PetscFunctionReturn(0); 418 } 419 420 PetscErrorCode MatSetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 421 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 422 PetscScalar value = 0.0; 423 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 424 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 425 PetscBool roworiented = aij->roworiented; 426 427 /* Some Variables required in the macro */ 428 Mat A = aij->A; 429 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 430 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 431 PetscBool ignorezeroentries = a->ignorezeroentries; 432 Mat B = aij->B; 433 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 434 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 435 MatScalar *aa, *ba; 436 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 437 PetscInt nonew; 438 MatScalar *ap1, *ap2; 439 440 PetscFunctionBegin; 441 PetscCall(MatSeqAIJGetArray(A, &aa)); 442 PetscCall(MatSeqAIJGetArray(B, &ba)); 443 for (i = 0; i < m; i++) { 444 if (im[i] < 0) continue; 445 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 446 if (im[i] >= rstart && im[i] < rend) { 447 row = im[i] - rstart; 448 lastcol1 = -1; 449 rp1 = aj + ai[row]; 450 ap1 = aa + ai[row]; 451 rmax1 = aimax[row]; 452 nrow1 = ailen[row]; 453 low1 = 0; 454 high1 = nrow1; 455 lastcol2 = -1; 456 rp2 = bj + bi[row]; 457 ap2 = ba + bi[row]; 458 rmax2 = bimax[row]; 459 nrow2 = bilen[row]; 460 low2 = 0; 461 high2 = nrow2; 462 463 for (j = 0; j < n; j++) { 464 if (v) value = roworiented ? v[i * n + j] : v[i + j * m]; 465 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 466 if (in[j] >= cstart && in[j] < cend) { 467 col = in[j] - cstart; 468 nonew = a->nonew; 469 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 470 } else if (in[j] < 0) { 471 continue; 472 } else { 473 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 474 if (mat->was_assembled) { 475 if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); } 476 #if defined(PETSC_USE_CTABLE) 477 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); /* map global col ids to local ones */ 478 col--; 479 #else 480 col = aij->colmap[in[j]] - 1; 481 #endif 482 if (col < 0 && !((Mat_SeqAIJ *)(aij->B->data))->nonew) { /* col < 0 means in[j] is a new col for B */ 483 PetscCall(MatDisAssemble_MPIAIJ(mat)); /* Change aij->B from reduced/local format to expanded/global format */ 484 col = in[j]; 485 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 486 B = aij->B; 487 b = (Mat_SeqAIJ *)B->data; 488 bimax = b->imax; 489 bi = b->i; 490 bilen = b->ilen; 491 bj = b->j; 492 ba = b->a; 493 rp2 = bj + bi[row]; 494 ap2 = ba + bi[row]; 495 rmax2 = bimax[row]; 496 nrow2 = bilen[row]; 497 low2 = 0; 498 high2 = nrow2; 499 bm = aij->B->rmap->n; 500 ba = b->a; 501 } else if (col < 0 && !(ignorezeroentries && value == 0.0)) { 502 if (1 == ((Mat_SeqAIJ *)(aij->B->data))->nonew) { 503 PetscCall(PetscInfo(mat, "Skipping of insertion of new nonzero location in off-diagonal portion of matrix %g(%" PetscInt_FMT ",%" PetscInt_FMT ")\n", (double)PetscRealPart(value), im[i], in[j])); 504 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 505 } 506 } else col = in[j]; 507 nonew = b->nonew; 508 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 509 } 510 } 511 } else { 512 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 513 if (!aij->donotstash) { 514 mat->assembled = PETSC_FALSE; 515 if (roworiented) { 516 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 517 } else { 518 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 519 } 520 } 521 } 522 } 523 PetscCall(MatSeqAIJRestoreArray(A, &aa)); /* aa, bb might have been free'd due to reallocation above. But we don't access them here */ 524 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 525 PetscFunctionReturn(0); 526 } 527 528 /* 529 This function sets the j and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 530 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 531 No off-processor parts off the matrix are allowed here and mat->was_assembled has to be PETSC_FALSE. 532 */ 533 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat_Symbolic(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[]) { 534 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 535 Mat A = aij->A; /* diagonal part of the matrix */ 536 Mat B = aij->B; /* offdiagonal part of the matrix */ 537 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 538 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 539 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, col; 540 PetscInt *ailen = a->ilen, *aj = a->j; 541 PetscInt *bilen = b->ilen, *bj = b->j; 542 PetscInt am = aij->A->rmap->n, j; 543 PetscInt diag_so_far = 0, dnz; 544 PetscInt offd_so_far = 0, onz; 545 546 PetscFunctionBegin; 547 /* Iterate over all rows of the matrix */ 548 for (j = 0; j < am; j++) { 549 dnz = onz = 0; 550 /* Iterate over all non-zero columns of the current row */ 551 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 552 /* If column is in the diagonal */ 553 if (mat_j[col] >= cstart && mat_j[col] < cend) { 554 aj[diag_so_far++] = mat_j[col] - cstart; 555 dnz++; 556 } else { /* off-diagonal entries */ 557 bj[offd_so_far++] = mat_j[col]; 558 onz++; 559 } 560 } 561 ailen[j] = dnz; 562 bilen[j] = onz; 563 } 564 PetscFunctionReturn(0); 565 } 566 567 /* 568 This function sets the local j, a and ilen arrays (of the diagonal and off-diagonal part) of an MPIAIJ-matrix. 569 The values in mat_i have to be sorted and the values in mat_j have to be sorted for each row (CSR-like). 570 No off-processor parts off the matrix are allowed here, they are set at a later point by MatSetValues_MPIAIJ. 571 Also, mat->was_assembled has to be false, otherwise the statement aj[rowstart_diag+dnz_row] = mat_j[col] - cstart; 572 would not be true and the more complex MatSetValues_MPIAIJ has to be used. 573 */ 574 PetscErrorCode MatSetValues_MPIAIJ_CopyFromCSRFormat(Mat mat, const PetscInt mat_j[], const PetscInt mat_i[], const PetscScalar mat_a[]) { 575 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 576 Mat A = aij->A; /* diagonal part of the matrix */ 577 Mat B = aij->B; /* offdiagonal part of the matrix */ 578 Mat_SeqAIJ *aijd = (Mat_SeqAIJ *)(aij->A)->data, *aijo = (Mat_SeqAIJ *)(aij->B)->data; 579 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 580 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 581 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend; 582 PetscInt *ailen = a->ilen, *aj = a->j; 583 PetscInt *bilen = b->ilen, *bj = b->j; 584 PetscInt am = aij->A->rmap->n, j; 585 PetscInt *full_diag_i = aijd->i, *full_offd_i = aijo->i; /* These variables can also include non-local elements, which are set at a later point. */ 586 PetscInt col, dnz_row, onz_row, rowstart_diag, rowstart_offd; 587 PetscScalar *aa = a->a, *ba = b->a; 588 589 PetscFunctionBegin; 590 /* Iterate over all rows of the matrix */ 591 for (j = 0; j < am; j++) { 592 dnz_row = onz_row = 0; 593 rowstart_offd = full_offd_i[j]; 594 rowstart_diag = full_diag_i[j]; 595 /* Iterate over all non-zero columns of the current row */ 596 for (col = mat_i[j]; col < mat_i[j + 1]; col++) { 597 /* If column is in the diagonal */ 598 if (mat_j[col] >= cstart && mat_j[col] < cend) { 599 aj[rowstart_diag + dnz_row] = mat_j[col] - cstart; 600 aa[rowstart_diag + dnz_row] = mat_a[col]; 601 dnz_row++; 602 } else { /* off-diagonal entries */ 603 bj[rowstart_offd + onz_row] = mat_j[col]; 604 ba[rowstart_offd + onz_row] = mat_a[col]; 605 onz_row++; 606 } 607 } 608 ailen[j] = dnz_row; 609 bilen[j] = onz_row; 610 } 611 PetscFunctionReturn(0); 612 } 613 614 PetscErrorCode MatGetValues_MPIAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 615 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 616 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 617 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 618 619 PetscFunctionBegin; 620 for (i = 0; i < m; i++) { 621 if (idxm[i] < 0) continue; /* negative row */ 622 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 623 if (idxm[i] >= rstart && idxm[i] < rend) { 624 row = idxm[i] - rstart; 625 for (j = 0; j < n; j++) { 626 if (idxn[j] < 0) continue; /* negative column */ 627 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 628 if (idxn[j] >= cstart && idxn[j] < cend) { 629 col = idxn[j] - cstart; 630 PetscCall(MatGetValues(aij->A, 1, &row, 1, &col, v + i * n + j)); 631 } else { 632 if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); } 633 #if defined(PETSC_USE_CTABLE) 634 PetscCall(PetscTableFind(aij->colmap, idxn[j] + 1, &col)); 635 col--; 636 #else 637 col = aij->colmap[idxn[j]] - 1; 638 #endif 639 if ((col < 0) || (aij->garray[col] != idxn[j])) *(v + i * n + j) = 0.0; 640 else { PetscCall(MatGetValues(aij->B, 1, &row, 1, &col, v + i * n + j)); } 641 } 642 } 643 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 644 } 645 PetscFunctionReturn(0); 646 } 647 648 PetscErrorCode MatAssemblyBegin_MPIAIJ(Mat mat, MatAssemblyType mode) { 649 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 650 PetscInt nstash, reallocs; 651 652 PetscFunctionBegin; 653 if (aij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 654 655 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 656 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 657 PetscCall(PetscInfo(aij->A, "Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 658 PetscFunctionReturn(0); 659 } 660 661 PetscErrorCode MatAssemblyEnd_MPIAIJ(Mat mat, MatAssemblyType mode) { 662 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 663 PetscMPIInt n; 664 PetscInt i, j, rstart, ncols, flg; 665 PetscInt *row, *col; 666 PetscBool other_disassembled; 667 PetscScalar *val; 668 669 /* do not use 'b = (Mat_SeqAIJ*)aij->B->data' as B can be reset in disassembly */ 670 671 PetscFunctionBegin; 672 if (!aij->donotstash && !mat->nooffprocentries) { 673 while (1) { 674 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 675 if (!flg) break; 676 677 for (i = 0; i < n;) { 678 /* Now identify the consecutive vals belonging to the same row */ 679 for (j = i, rstart = row[j]; j < n; j++) { 680 if (row[j] != rstart) break; 681 } 682 if (j < n) ncols = j - i; 683 else ncols = n - i; 684 /* Now assemble all these values with a single function call */ 685 PetscCall(MatSetValues_MPIAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 686 i = j; 687 } 688 } 689 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 690 } 691 #if defined(PETSC_HAVE_DEVICE) 692 if (mat->offloadmask == PETSC_OFFLOAD_CPU) aij->A->offloadmask = PETSC_OFFLOAD_CPU; 693 /* We call MatBindToCPU() on aij->A and aij->B here, because if MatBindToCPU_MPIAIJ() is called before assembly, it cannot bind these. */ 694 if (mat->boundtocpu) { 695 PetscCall(MatBindToCPU(aij->A, PETSC_TRUE)); 696 PetscCall(MatBindToCPU(aij->B, PETSC_TRUE)); 697 } 698 #endif 699 PetscCall(MatAssemblyBegin(aij->A, mode)); 700 PetscCall(MatAssemblyEnd(aij->A, mode)); 701 702 /* determine if any processor has disassembled, if so we must 703 also disassemble ourself, in order that we may reassemble. */ 704 /* 705 if nonzero structure of submatrix B cannot change then we know that 706 no processor disassembled thus we can skip this stuff 707 */ 708 if (!((Mat_SeqAIJ *)aij->B->data)->nonew) { 709 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 710 if (mat->was_assembled && !other_disassembled) { /* mat on this rank has reduced off-diag B with local col ids, but globaly it does not */ 711 PetscCall(MatDisAssemble_MPIAIJ(mat)); 712 } 713 } 714 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) { PetscCall(MatSetUpMultiply_MPIAIJ(mat)); } 715 PetscCall(MatSetOption(aij->B, MAT_USE_INODES, PETSC_FALSE)); 716 #if defined(PETSC_HAVE_DEVICE) 717 if (mat->offloadmask == PETSC_OFFLOAD_CPU && aij->B->offloadmask != PETSC_OFFLOAD_UNALLOCATED) aij->B->offloadmask = PETSC_OFFLOAD_CPU; 718 #endif 719 PetscCall(MatAssemblyBegin(aij->B, mode)); 720 PetscCall(MatAssemblyEnd(aij->B, mode)); 721 722 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 723 724 aij->rowvalues = NULL; 725 726 PetscCall(VecDestroy(&aij->diag)); 727 728 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 729 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 730 PetscObjectState state = aij->A->nonzerostate + aij->B->nonzerostate; 731 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 732 } 733 #if defined(PETSC_HAVE_DEVICE) 734 mat->offloadmask = PETSC_OFFLOAD_BOTH; 735 #endif 736 PetscFunctionReturn(0); 737 } 738 739 PetscErrorCode MatZeroEntries_MPIAIJ(Mat A) { 740 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 741 742 PetscFunctionBegin; 743 PetscCall(MatZeroEntries(l->A)); 744 PetscCall(MatZeroEntries(l->B)); 745 PetscFunctionReturn(0); 746 } 747 748 PetscErrorCode MatZeroRows_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 749 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 750 PetscObjectState sA, sB; 751 PetscInt *lrows; 752 PetscInt r, len; 753 PetscBool cong, lch, gch; 754 755 PetscFunctionBegin; 756 /* get locally owned rows */ 757 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 758 PetscCall(MatHasCongruentLayouts(A, &cong)); 759 /* fix right hand side if needed */ 760 if (x && b) { 761 const PetscScalar *xx; 762 PetscScalar *bb; 763 764 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 765 PetscCall(VecGetArrayRead(x, &xx)); 766 PetscCall(VecGetArray(b, &bb)); 767 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 768 PetscCall(VecRestoreArrayRead(x, &xx)); 769 PetscCall(VecRestoreArray(b, &bb)); 770 } 771 772 sA = mat->A->nonzerostate; 773 sB = mat->B->nonzerostate; 774 775 if (diag != 0.0 && cong) { 776 PetscCall(MatZeroRows(mat->A, len, lrows, diag, NULL, NULL)); 777 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 778 } else if (diag != 0.0) { /* non-square or non congruent layouts -> if keepnonzeropattern is false, we allow for new insertion */ 779 Mat_SeqAIJ *aijA = (Mat_SeqAIJ *)mat->A->data; 780 Mat_SeqAIJ *aijB = (Mat_SeqAIJ *)mat->B->data; 781 PetscInt nnwA, nnwB; 782 PetscBool nnzA, nnzB; 783 784 nnwA = aijA->nonew; 785 nnwB = aijB->nonew; 786 nnzA = aijA->keepnonzeropattern; 787 nnzB = aijB->keepnonzeropattern; 788 if (!nnzA) { 789 PetscCall(PetscInfo(mat->A, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on diagonal block.\n")); 790 aijA->nonew = 0; 791 } 792 if (!nnzB) { 793 PetscCall(PetscInfo(mat->B, "Requested to not keep the pattern and add a nonzero diagonal; may encounter reallocations on off-diagonal block.\n")); 794 aijB->nonew = 0; 795 } 796 /* Must zero here before the next loop */ 797 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 798 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 799 for (r = 0; r < len; ++r) { 800 const PetscInt row = lrows[r] + A->rmap->rstart; 801 if (row >= A->cmap->N) continue; 802 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 803 } 804 aijA->nonew = nnwA; 805 aijB->nonew = nnwB; 806 } else { 807 PetscCall(MatZeroRows(mat->A, len, lrows, 0.0, NULL, NULL)); 808 PetscCall(MatZeroRows(mat->B, len, lrows, 0.0, NULL, NULL)); 809 } 810 PetscCall(PetscFree(lrows)); 811 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 812 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 813 814 /* reduce nonzerostate */ 815 lch = (PetscBool)(sA != mat->A->nonzerostate || sB != mat->B->nonzerostate); 816 PetscCall(MPIU_Allreduce(&lch, &gch, 1, MPIU_BOOL, MPI_LOR, PetscObjectComm((PetscObject)A))); 817 if (gch) A->nonzerostate++; 818 PetscFunctionReturn(0); 819 } 820 821 PetscErrorCode MatZeroRowsColumns_MPIAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 822 Mat_MPIAIJ *l = (Mat_MPIAIJ *)A->data; 823 PetscMPIInt n = A->rmap->n; 824 PetscInt i, j, r, m, len = 0; 825 PetscInt *lrows, *owners = A->rmap->range; 826 PetscMPIInt p = 0; 827 PetscSFNode *rrows; 828 PetscSF sf; 829 const PetscScalar *xx; 830 PetscScalar *bb, *mask, *aij_a; 831 Vec xmask, lmask; 832 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)l->B->data; 833 const PetscInt *aj, *ii, *ridx; 834 PetscScalar *aa; 835 836 PetscFunctionBegin; 837 /* Create SF where leaves are input rows and roots are owned rows */ 838 PetscCall(PetscMalloc1(n, &lrows)); 839 for (r = 0; r < n; ++r) lrows[r] = -1; 840 PetscCall(PetscMalloc1(N, &rrows)); 841 for (r = 0; r < N; ++r) { 842 const PetscInt idx = rows[r]; 843 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 844 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 845 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 846 } 847 rrows[r].rank = p; 848 rrows[r].index = rows[r] - owners[p]; 849 } 850 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 851 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 852 /* Collect flags for rows to be zeroed */ 853 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 854 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 855 PetscCall(PetscSFDestroy(&sf)); 856 /* Compress and put in row numbers */ 857 for (r = 0; r < n; ++r) 858 if (lrows[r] >= 0) lrows[len++] = r; 859 /* zero diagonal part of matrix */ 860 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 861 /* handle off diagonal part of matrix */ 862 PetscCall(MatCreateVecs(A, &xmask, NULL)); 863 PetscCall(VecDuplicate(l->lvec, &lmask)); 864 PetscCall(VecGetArray(xmask, &bb)); 865 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 866 PetscCall(VecRestoreArray(xmask, &bb)); 867 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 868 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 869 PetscCall(VecDestroy(&xmask)); 870 if (x && b) { /* this code is buggy when the row and column layout don't match */ 871 PetscBool cong; 872 873 PetscCall(MatHasCongruentLayouts(A, &cong)); 874 PetscCheck(cong, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Need matching row/col layout"); 875 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 876 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 877 PetscCall(VecGetArrayRead(l->lvec, &xx)); 878 PetscCall(VecGetArray(b, &bb)); 879 } 880 PetscCall(VecGetArray(lmask, &mask)); 881 /* remove zeroed rows of off diagonal matrix */ 882 PetscCall(MatSeqAIJGetArray(l->B, &aij_a)); 883 ii = aij->i; 884 for (i = 0; i < len; i++) { PetscCall(PetscArrayzero(aij_a + ii[lrows[i]], ii[lrows[i] + 1] - ii[lrows[i]])); } 885 /* loop over all elements of off process part of matrix zeroing removed columns*/ 886 if (aij->compressedrow.use) { 887 m = aij->compressedrow.nrows; 888 ii = aij->compressedrow.i; 889 ridx = aij->compressedrow.rindex; 890 for (i = 0; i < m; i++) { 891 n = ii[i + 1] - ii[i]; 892 aj = aij->j + ii[i]; 893 aa = aij_a + ii[i]; 894 895 for (j = 0; j < n; j++) { 896 if (PetscAbsScalar(mask[*aj])) { 897 if (b) bb[*ridx] -= *aa * xx[*aj]; 898 *aa = 0.0; 899 } 900 aa++; 901 aj++; 902 } 903 ridx++; 904 } 905 } else { /* do not use compressed row format */ 906 m = l->B->rmap->n; 907 for (i = 0; i < m; i++) { 908 n = ii[i + 1] - ii[i]; 909 aj = aij->j + ii[i]; 910 aa = aij_a + ii[i]; 911 for (j = 0; j < n; j++) { 912 if (PetscAbsScalar(mask[*aj])) { 913 if (b) bb[i] -= *aa * xx[*aj]; 914 *aa = 0.0; 915 } 916 aa++; 917 aj++; 918 } 919 } 920 } 921 if (x && b) { 922 PetscCall(VecRestoreArray(b, &bb)); 923 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 924 } 925 PetscCall(MatSeqAIJRestoreArray(l->B, &aij_a)); 926 PetscCall(VecRestoreArray(lmask, &mask)); 927 PetscCall(VecDestroy(&lmask)); 928 PetscCall(PetscFree(lrows)); 929 930 /* only change matrix nonzero state if pattern was allowed to be changed */ 931 if (!((Mat_SeqAIJ *)(l->A->data))->keepnonzeropattern) { 932 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 933 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 934 } 935 PetscFunctionReturn(0); 936 } 937 938 PetscErrorCode MatMult_MPIAIJ(Mat A, Vec xx, Vec yy) { 939 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 940 PetscInt nt; 941 VecScatter Mvctx = a->Mvctx; 942 943 PetscFunctionBegin; 944 PetscCall(VecGetLocalSize(xx, &nt)); 945 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A (%" PetscInt_FMT ") and xx (%" PetscInt_FMT ")", A->cmap->n, nt); 946 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 947 PetscUseTypeMethod(a->A, mult, xx, yy); 948 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 949 PetscUseTypeMethod(a->B, multadd, a->lvec, yy, yy); 950 PetscFunctionReturn(0); 951 } 952 953 PetscErrorCode MatMultDiagonalBlock_MPIAIJ(Mat A, Vec bb, Vec xx) { 954 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 955 956 PetscFunctionBegin; 957 PetscCall(MatMultDiagonalBlock(a->A, bb, xx)); 958 PetscFunctionReturn(0); 959 } 960 961 PetscErrorCode MatMultAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 962 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 963 VecScatter Mvctx = a->Mvctx; 964 965 PetscFunctionBegin; 966 PetscCall(VecScatterBegin(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 967 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 968 PetscCall(VecScatterEnd(Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 969 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 970 PetscFunctionReturn(0); 971 } 972 973 PetscErrorCode MatMultTranspose_MPIAIJ(Mat A, Vec xx, Vec yy) { 974 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 975 976 PetscFunctionBegin; 977 /* do nondiagonal part */ 978 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 979 /* do local part */ 980 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 981 /* add partial results together */ 982 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 983 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 984 PetscFunctionReturn(0); 985 } 986 987 PetscErrorCode MatIsTranspose_MPIAIJ(Mat Amat, Mat Bmat, PetscReal tol, PetscBool *f) { 988 MPI_Comm comm; 989 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)Amat->data, *Bij; 990 Mat Adia = Aij->A, Bdia, Aoff, Boff, *Aoffs, *Boffs; 991 IS Me, Notme; 992 PetscInt M, N, first, last, *notme, i; 993 PetscBool lf; 994 PetscMPIInt size; 995 996 PetscFunctionBegin; 997 /* Easy test: symmetric diagonal block */ 998 Bij = (Mat_MPIAIJ *)Bmat->data; 999 Bdia = Bij->A; 1000 PetscCall(MatIsTranspose(Adia, Bdia, tol, &lf)); 1001 PetscCall(MPIU_Allreduce(&lf, f, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)Amat))); 1002 if (!*f) PetscFunctionReturn(0); 1003 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 1004 PetscCallMPI(MPI_Comm_size(comm, &size)); 1005 if (size == 1) PetscFunctionReturn(0); 1006 1007 /* Hard test: off-diagonal block. This takes a MatCreateSubMatrix. */ 1008 PetscCall(MatGetSize(Amat, &M, &N)); 1009 PetscCall(MatGetOwnershipRange(Amat, &first, &last)); 1010 PetscCall(PetscMalloc1(N - last + first, ¬me)); 1011 for (i = 0; i < first; i++) notme[i] = i; 1012 for (i = last; i < M; i++) notme[i - last + first] = i; 1013 PetscCall(ISCreateGeneral(MPI_COMM_SELF, N - last + first, notme, PETSC_COPY_VALUES, &Notme)); 1014 PetscCall(ISCreateStride(MPI_COMM_SELF, last - first, first, 1, &Me)); 1015 PetscCall(MatCreateSubMatrices(Amat, 1, &Me, &Notme, MAT_INITIAL_MATRIX, &Aoffs)); 1016 Aoff = Aoffs[0]; 1017 PetscCall(MatCreateSubMatrices(Bmat, 1, &Notme, &Me, MAT_INITIAL_MATRIX, &Boffs)); 1018 Boff = Boffs[0]; 1019 PetscCall(MatIsTranspose(Aoff, Boff, tol, f)); 1020 PetscCall(MatDestroyMatrices(1, &Aoffs)); 1021 PetscCall(MatDestroyMatrices(1, &Boffs)); 1022 PetscCall(ISDestroy(&Me)); 1023 PetscCall(ISDestroy(&Notme)); 1024 PetscCall(PetscFree(notme)); 1025 PetscFunctionReturn(0); 1026 } 1027 1028 PetscErrorCode MatIsSymmetric_MPIAIJ(Mat A, PetscReal tol, PetscBool *f) { 1029 PetscFunctionBegin; 1030 PetscCall(MatIsTranspose_MPIAIJ(A, A, tol, f)); 1031 PetscFunctionReturn(0); 1032 } 1033 1034 PetscErrorCode MatMultTransposeAdd_MPIAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1035 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1036 1037 PetscFunctionBegin; 1038 /* do nondiagonal part */ 1039 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1040 /* do local part */ 1041 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1042 /* add partial results together */ 1043 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1044 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1045 PetscFunctionReturn(0); 1046 } 1047 1048 /* 1049 This only works correctly for square matrices where the subblock A->A is the 1050 diagonal block 1051 */ 1052 PetscErrorCode MatGetDiagonal_MPIAIJ(Mat A, Vec v) { 1053 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1054 1055 PetscFunctionBegin; 1056 PetscCheck(A->rmap->N == A->cmap->N, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1057 PetscCheck(A->rmap->rstart == A->cmap->rstart && A->rmap->rend == A->cmap->rend, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "row partition must equal col partition"); 1058 PetscCall(MatGetDiagonal(a->A, v)); 1059 PetscFunctionReturn(0); 1060 } 1061 1062 PetscErrorCode MatScale_MPIAIJ(Mat A, PetscScalar aa) { 1063 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1064 1065 PetscFunctionBegin; 1066 PetscCall(MatScale(a->A, aa)); 1067 PetscCall(MatScale(a->B, aa)); 1068 PetscFunctionReturn(0); 1069 } 1070 1071 /* Free COO stuff; must match allocation methods in MatSetPreallocationCOO_MPIAIJ() */ 1072 PETSC_INTERN PetscErrorCode MatResetPreallocationCOO_MPIAIJ(Mat mat) { 1073 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1074 1075 PetscFunctionBegin; 1076 PetscCall(PetscSFDestroy(&aij->coo_sf)); 1077 PetscCall(PetscFree(aij->Aperm1)); 1078 PetscCall(PetscFree(aij->Bperm1)); 1079 PetscCall(PetscFree(aij->Ajmap1)); 1080 PetscCall(PetscFree(aij->Bjmap1)); 1081 1082 PetscCall(PetscFree(aij->Aimap2)); 1083 PetscCall(PetscFree(aij->Bimap2)); 1084 PetscCall(PetscFree(aij->Aperm2)); 1085 PetscCall(PetscFree(aij->Bperm2)); 1086 PetscCall(PetscFree(aij->Ajmap2)); 1087 PetscCall(PetscFree(aij->Bjmap2)); 1088 1089 PetscCall(PetscFree2(aij->sendbuf, aij->recvbuf)); 1090 PetscCall(PetscFree(aij->Cperm1)); 1091 PetscFunctionReturn(0); 1092 } 1093 1094 PetscErrorCode MatDestroy_MPIAIJ(Mat mat) { 1095 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1096 1097 PetscFunctionBegin; 1098 #if defined(PETSC_USE_LOG) 1099 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ", Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1100 #endif 1101 PetscCall(MatStashDestroy_Private(&mat->stash)); 1102 PetscCall(VecDestroy(&aij->diag)); 1103 PetscCall(MatDestroy(&aij->A)); 1104 PetscCall(MatDestroy(&aij->B)); 1105 #if defined(PETSC_USE_CTABLE) 1106 PetscCall(PetscTableDestroy(&aij->colmap)); 1107 #else 1108 PetscCall(PetscFree(aij->colmap)); 1109 #endif 1110 PetscCall(PetscFree(aij->garray)); 1111 PetscCall(VecDestroy(&aij->lvec)); 1112 PetscCall(VecScatterDestroy(&aij->Mvctx)); 1113 PetscCall(PetscFree2(aij->rowvalues, aij->rowindices)); 1114 PetscCall(PetscFree(aij->ld)); 1115 1116 /* Free COO */ 1117 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 1118 1119 PetscCall(PetscFree(mat->data)); 1120 1121 /* may be created by MatCreateMPIAIJSumSeqAIJSymbolic */ 1122 PetscCall(PetscObjectCompose((PetscObject)mat, "MatMergeSeqsToMPI", NULL)); 1123 1124 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1125 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1126 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1127 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatIsTranspose_C", NULL)); 1128 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocation_C", NULL)); 1129 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatResetPreallocation_C", NULL)); 1130 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetPreallocationCSR_C", NULL)); 1131 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1132 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpibaij_C", NULL)); 1133 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisbaij_C", NULL)); 1134 #if defined(PETSC_HAVE_CUDA) 1135 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcusparse_C", NULL)); 1136 #endif 1137 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 1138 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijkokkos_C", NULL)); 1139 #endif 1140 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpidense_C", NULL)); 1141 #if defined(PETSC_HAVE_ELEMENTAL) 1142 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_elemental_C", NULL)); 1143 #endif 1144 #if defined(PETSC_HAVE_SCALAPACK) 1145 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_scalapack_C", NULL)); 1146 #endif 1147 #if defined(PETSC_HAVE_HYPRE) 1148 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_hypre_C", NULL)); 1149 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", NULL)); 1150 #endif 1151 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1152 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_is_mpiaij_C", NULL)); 1153 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatProductSetFromOptions_mpiaij_mpiaij_C", NULL)); 1154 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIAIJSetUseScalableIncreaseOverlap_C", NULL)); 1155 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijperm_C", NULL)); 1156 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijsell_C", NULL)); 1157 #if defined(PETSC_HAVE_MKL_SPARSE) 1158 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijmkl_C", NULL)); 1159 #endif 1160 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpiaijcrl_C", NULL)); 1161 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_is_C", NULL)); 1162 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpiaij_mpisell_C", NULL)); 1163 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetPreallocationCOO_C", NULL)); 1164 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetValuesCOO_C", NULL)); 1165 PetscFunctionReturn(0); 1166 } 1167 1168 PetscErrorCode MatView_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 1169 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1170 Mat_SeqAIJ *A = (Mat_SeqAIJ *)aij->A->data; 1171 Mat_SeqAIJ *B = (Mat_SeqAIJ *)aij->B->data; 1172 const PetscInt *garray = aij->garray; 1173 const PetscScalar *aa, *ba; 1174 PetscInt header[4], M, N, m, rs, cs, nz, cnt, i, ja, jb; 1175 PetscInt *rowlens; 1176 PetscInt *colidxs; 1177 PetscScalar *matvals; 1178 1179 PetscFunctionBegin; 1180 PetscCall(PetscViewerSetUp(viewer)); 1181 1182 M = mat->rmap->N; 1183 N = mat->cmap->N; 1184 m = mat->rmap->n; 1185 rs = mat->rmap->rstart; 1186 cs = mat->cmap->rstart; 1187 nz = A->nz + B->nz; 1188 1189 /* write matrix header */ 1190 header[0] = MAT_FILE_CLASSID; 1191 header[1] = M; 1192 header[2] = N; 1193 header[3] = nz; 1194 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1195 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1196 1197 /* fill in and store row lengths */ 1198 PetscCall(PetscMalloc1(m, &rowlens)); 1199 for (i = 0; i < m; i++) rowlens[i] = A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]; 1200 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1201 PetscCall(PetscFree(rowlens)); 1202 1203 /* fill in and store column indices */ 1204 PetscCall(PetscMalloc1(nz, &colidxs)); 1205 for (cnt = 0, i = 0; i < m; i++) { 1206 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1207 if (garray[B->j[jb]] > cs) break; 1208 colidxs[cnt++] = garray[B->j[jb]]; 1209 } 1210 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) colidxs[cnt++] = A->j[ja] + cs; 1211 for (; jb < B->i[i + 1]; jb++) colidxs[cnt++] = garray[B->j[jb]]; 1212 } 1213 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1214 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 1215 PetscCall(PetscFree(colidxs)); 1216 1217 /* fill in and store nonzero values */ 1218 PetscCall(MatSeqAIJGetArrayRead(aij->A, &aa)); 1219 PetscCall(MatSeqAIJGetArrayRead(aij->B, &ba)); 1220 PetscCall(PetscMalloc1(nz, &matvals)); 1221 for (cnt = 0, i = 0; i < m; i++) { 1222 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1223 if (garray[B->j[jb]] > cs) break; 1224 matvals[cnt++] = ba[jb]; 1225 } 1226 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) matvals[cnt++] = aa[ja]; 1227 for (; jb < B->i[i + 1]; jb++) matvals[cnt++] = ba[jb]; 1228 } 1229 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &aa)); 1230 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &ba)); 1231 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1232 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 1233 PetscCall(PetscFree(matvals)); 1234 1235 /* write block size option to the viewer's .info file */ 1236 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1237 PetscFunctionReturn(0); 1238 } 1239 1240 #include <petscdraw.h> 1241 PetscErrorCode MatView_MPIAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 1242 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1243 PetscMPIInt rank = aij->rank, size = aij->size; 1244 PetscBool isdraw, iascii, isbinary; 1245 PetscViewer sviewer; 1246 PetscViewerFormat format; 1247 1248 PetscFunctionBegin; 1249 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1250 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1251 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1252 if (iascii) { 1253 PetscCall(PetscViewerGetFormat(viewer, &format)); 1254 if (format == PETSC_VIEWER_LOAD_BALANCE) { 1255 PetscInt i, nmax = 0, nmin = PETSC_MAX_INT, navg = 0, *nz, nzlocal = ((Mat_SeqAIJ *)(aij->A->data))->nz + ((Mat_SeqAIJ *)(aij->B->data))->nz; 1256 PetscCall(PetscMalloc1(size, &nz)); 1257 PetscCallMPI(MPI_Allgather(&nzlocal, 1, MPIU_INT, nz, 1, MPIU_INT, PetscObjectComm((PetscObject)mat))); 1258 for (i = 0; i < (PetscInt)size; i++) { 1259 nmax = PetscMax(nmax, nz[i]); 1260 nmin = PetscMin(nmin, nz[i]); 1261 navg += nz[i]; 1262 } 1263 PetscCall(PetscFree(nz)); 1264 navg = navg / size; 1265 PetscCall(PetscViewerASCIIPrintf(viewer, "Load Balance - Nonzeros: Min %" PetscInt_FMT " avg %" PetscInt_FMT " max %" PetscInt_FMT "\n", nmin, navg, nmax)); 1266 PetscFunctionReturn(0); 1267 } 1268 PetscCall(PetscViewerGetFormat(viewer, &format)); 1269 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1270 MatInfo info; 1271 PetscInt *inodes = NULL; 1272 1273 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1274 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1275 PetscCall(MatInodeGetInodeSizes(aij->A, NULL, &inodes, NULL)); 1276 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1277 if (!inodes) { 1278 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, not using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1279 (double)info.memory)); 1280 } else { 1281 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " mem %g, using I-node routines\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1282 (double)info.memory)); 1283 } 1284 PetscCall(MatGetInfo(aij->A, MAT_LOCAL, &info)); 1285 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1286 PetscCall(MatGetInfo(aij->B, MAT_LOCAL, &info)); 1287 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1288 PetscCall(PetscViewerFlush(viewer)); 1289 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1290 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1291 PetscCall(VecScatterView(aij->Mvctx, viewer)); 1292 PetscFunctionReturn(0); 1293 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1294 PetscInt inodecount, inodelimit, *inodes; 1295 PetscCall(MatInodeGetInodeSizes(aij->A, &inodecount, &inodes, &inodelimit)); 1296 if (inodes) { 1297 PetscCall(PetscViewerASCIIPrintf(viewer, "using I-node (on process 0) routines: found %" PetscInt_FMT " nodes, limit used is %" PetscInt_FMT "\n", inodecount, inodelimit)); 1298 } else { 1299 PetscCall(PetscViewerASCIIPrintf(viewer, "not using I-node (on process 0) routines\n")); 1300 } 1301 PetscFunctionReturn(0); 1302 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1303 PetscFunctionReturn(0); 1304 } 1305 } else if (isbinary) { 1306 if (size == 1) { 1307 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1308 PetscCall(MatView(aij->A, viewer)); 1309 } else { 1310 PetscCall(MatView_MPIAIJ_Binary(mat, viewer)); 1311 } 1312 PetscFunctionReturn(0); 1313 } else if (iascii && size == 1) { 1314 PetscCall(PetscObjectSetName((PetscObject)aij->A, ((PetscObject)mat)->name)); 1315 PetscCall(MatView(aij->A, viewer)); 1316 PetscFunctionReturn(0); 1317 } else if (isdraw) { 1318 PetscDraw draw; 1319 PetscBool isnull; 1320 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1321 PetscCall(PetscDrawIsNull(draw, &isnull)); 1322 if (isnull) PetscFunctionReturn(0); 1323 } 1324 1325 { /* assemble the entire matrix onto first processor */ 1326 Mat A = NULL, Av; 1327 IS isrow, iscol; 1328 1329 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->rmap->N : 0, 0, 1, &isrow)); 1330 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), rank == 0 ? mat->cmap->N : 0, 0, 1, &iscol)); 1331 PetscCall(MatCreateSubMatrix(mat, isrow, iscol, MAT_INITIAL_MATRIX, &A)); 1332 PetscCall(MatMPIAIJGetSeqAIJ(A, &Av, NULL, NULL)); 1333 /* The commented code uses MatCreateSubMatrices instead */ 1334 /* 1335 Mat *AA, A = NULL, Av; 1336 IS isrow,iscol; 1337 1338 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->rmap->N : 0,0,1,&isrow)); 1339 PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat),rank == 0 ? mat->cmap->N : 0,0,1,&iscol)); 1340 PetscCall(MatCreateSubMatrices(mat,1,&isrow,&iscol,MAT_INITIAL_MATRIX,&AA)); 1341 if (rank == 0) { 1342 PetscCall(PetscObjectReference((PetscObject)AA[0])); 1343 A = AA[0]; 1344 Av = AA[0]; 1345 } 1346 PetscCall(MatDestroySubMatrices(1,&AA)); 1347 */ 1348 PetscCall(ISDestroy(&iscol)); 1349 PetscCall(ISDestroy(&isrow)); 1350 /* 1351 Everyone has to call to draw the matrix since the graphics waits are 1352 synchronized across all processors that share the PetscDraw object 1353 */ 1354 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1355 if (rank == 0) { 1356 if (((PetscObject)mat)->name) { PetscCall(PetscObjectSetName((PetscObject)Av, ((PetscObject)mat)->name)); } 1357 PetscCall(MatView_SeqAIJ(Av, sviewer)); 1358 } 1359 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1360 PetscCall(PetscViewerFlush(viewer)); 1361 PetscCall(MatDestroy(&A)); 1362 } 1363 PetscFunctionReturn(0); 1364 } 1365 1366 PetscErrorCode MatView_MPIAIJ(Mat mat, PetscViewer viewer) { 1367 PetscBool iascii, isdraw, issocket, isbinary; 1368 1369 PetscFunctionBegin; 1370 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1371 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1372 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1373 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1374 if (iascii || isdraw || isbinary || issocket) { PetscCall(MatView_MPIAIJ_ASCIIorDraworSocket(mat, viewer)); } 1375 PetscFunctionReturn(0); 1376 } 1377 1378 PetscErrorCode MatSOR_MPIAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 1379 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1380 Vec bb1 = NULL; 1381 PetscBool hasop; 1382 1383 PetscFunctionBegin; 1384 if (flag == SOR_APPLY_UPPER) { 1385 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1386 PetscFunctionReturn(0); 1387 } 1388 1389 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS || flag & SOR_EISENSTAT) { PetscCall(VecDuplicate(bb, &bb1)); } 1390 1391 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 1392 if (flag & SOR_ZERO_INITIAL_GUESS) { 1393 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1394 its--; 1395 } 1396 1397 while (its--) { 1398 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1399 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1400 1401 /* update rhs: bb1 = bb - B*x */ 1402 PetscCall(VecScale(mat->lvec, -1.0)); 1403 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1404 1405 /* local sweep */ 1406 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 1407 } 1408 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 1409 if (flag & SOR_ZERO_INITIAL_GUESS) { 1410 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1411 its--; 1412 } 1413 while (its--) { 1414 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1415 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1416 1417 /* update rhs: bb1 = bb - B*x */ 1418 PetscCall(VecScale(mat->lvec, -1.0)); 1419 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1420 1421 /* local sweep */ 1422 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 1423 } 1424 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 1425 if (flag & SOR_ZERO_INITIAL_GUESS) { 1426 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 1427 its--; 1428 } 1429 while (its--) { 1430 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1431 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1432 1433 /* update rhs: bb1 = bb - B*x */ 1434 PetscCall(VecScale(mat->lvec, -1.0)); 1435 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 1436 1437 /* local sweep */ 1438 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 1439 } 1440 } else if (flag & SOR_EISENSTAT) { 1441 Vec xx1; 1442 1443 PetscCall(VecDuplicate(bb, &xx1)); 1444 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP), fshift, lits, 1, xx)); 1445 1446 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1447 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1448 if (!mat->diag) { 1449 PetscCall(MatCreateVecs(matin, &mat->diag, NULL)); 1450 PetscCall(MatGetDiagonal(matin, mat->diag)); 1451 } 1452 PetscCall(MatHasOperation(matin, MATOP_MULT_DIAGONAL_BLOCK, &hasop)); 1453 if (hasop) { 1454 PetscCall(MatMultDiagonalBlock(matin, xx, bb1)); 1455 } else { 1456 PetscCall(VecPointwiseMult(bb1, mat->diag, xx)); 1457 } 1458 PetscCall(VecAYPX(bb1, (omega - 2.0) / omega, bb)); 1459 1460 PetscCall(MatMultAdd(mat->B, mat->lvec, bb1, bb1)); 1461 1462 /* local sweep */ 1463 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, (MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP), fshift, lits, 1, xx1)); 1464 PetscCall(VecAXPY(xx, 1.0, xx1)); 1465 PetscCall(VecDestroy(&xx1)); 1466 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel SOR not supported"); 1467 1468 PetscCall(VecDestroy(&bb1)); 1469 1470 matin->factorerrortype = mat->A->factorerrortype; 1471 PetscFunctionReturn(0); 1472 } 1473 1474 PetscErrorCode MatPermute_MPIAIJ(Mat A, IS rowp, IS colp, Mat *B) { 1475 Mat aA, aB, Aperm; 1476 const PetscInt *rwant, *cwant, *gcols, *ai, *bi, *aj, *bj; 1477 PetscScalar *aa, *ba; 1478 PetscInt i, j, m, n, ng, anz, bnz, *dnnz, *onnz, *tdnnz, *tonnz, *rdest, *cdest, *work, *gcdest; 1479 PetscSF rowsf, sf; 1480 IS parcolp = NULL; 1481 PetscBool done; 1482 1483 PetscFunctionBegin; 1484 PetscCall(MatGetLocalSize(A, &m, &n)); 1485 PetscCall(ISGetIndices(rowp, &rwant)); 1486 PetscCall(ISGetIndices(colp, &cwant)); 1487 PetscCall(PetscMalloc3(PetscMax(m, n), &work, m, &rdest, n, &cdest)); 1488 1489 /* Invert row permutation to find out where my rows should go */ 1490 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &rowsf)); 1491 PetscCall(PetscSFSetGraphLayout(rowsf, A->rmap, A->rmap->n, NULL, PETSC_OWN_POINTER, rwant)); 1492 PetscCall(PetscSFSetFromOptions(rowsf)); 1493 for (i = 0; i < m; i++) work[i] = A->rmap->rstart + i; 1494 PetscCall(PetscSFReduceBegin(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1495 PetscCall(PetscSFReduceEnd(rowsf, MPIU_INT, work, rdest, MPI_REPLACE)); 1496 1497 /* Invert column permutation to find out where my columns should go */ 1498 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1499 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, A->cmap->n, NULL, PETSC_OWN_POINTER, cwant)); 1500 PetscCall(PetscSFSetFromOptions(sf)); 1501 for (i = 0; i < n; i++) work[i] = A->cmap->rstart + i; 1502 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1503 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, work, cdest, MPI_REPLACE)); 1504 PetscCall(PetscSFDestroy(&sf)); 1505 1506 PetscCall(ISRestoreIndices(rowp, &rwant)); 1507 PetscCall(ISRestoreIndices(colp, &cwant)); 1508 PetscCall(MatMPIAIJGetSeqAIJ(A, &aA, &aB, &gcols)); 1509 1510 /* Find out where my gcols should go */ 1511 PetscCall(MatGetSize(aB, NULL, &ng)); 1512 PetscCall(PetscMalloc1(ng, &gcdest)); 1513 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1514 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, ng, NULL, PETSC_OWN_POINTER, gcols)); 1515 PetscCall(PetscSFSetFromOptions(sf)); 1516 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1517 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, cdest, gcdest, MPI_REPLACE)); 1518 PetscCall(PetscSFDestroy(&sf)); 1519 1520 PetscCall(PetscCalloc4(m, &dnnz, m, &onnz, m, &tdnnz, m, &tonnz)); 1521 PetscCall(MatGetRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1522 PetscCall(MatGetRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1523 for (i = 0; i < m; i++) { 1524 PetscInt row = rdest[i]; 1525 PetscMPIInt rowner; 1526 PetscCall(PetscLayoutFindOwner(A->rmap, row, &rowner)); 1527 for (j = ai[i]; j < ai[i + 1]; j++) { 1528 PetscInt col = cdest[aj[j]]; 1529 PetscMPIInt cowner; 1530 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); /* Could build an index for the columns to eliminate this search */ 1531 if (rowner == cowner) dnnz[i]++; 1532 else onnz[i]++; 1533 } 1534 for (j = bi[i]; j < bi[i + 1]; j++) { 1535 PetscInt col = gcdest[bj[j]]; 1536 PetscMPIInt cowner; 1537 PetscCall(PetscLayoutFindOwner(A->cmap, col, &cowner)); 1538 if (rowner == cowner) dnnz[i]++; 1539 else onnz[i]++; 1540 } 1541 } 1542 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1543 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, dnnz, tdnnz, MPI_REPLACE)); 1544 PetscCall(PetscSFBcastBegin(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1545 PetscCall(PetscSFBcastEnd(rowsf, MPIU_INT, onnz, tonnz, MPI_REPLACE)); 1546 PetscCall(PetscSFDestroy(&rowsf)); 1547 1548 PetscCall(MatCreateAIJ(PetscObjectComm((PetscObject)A), A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N, 0, tdnnz, 0, tonnz, &Aperm)); 1549 PetscCall(MatSeqAIJGetArray(aA, &aa)); 1550 PetscCall(MatSeqAIJGetArray(aB, &ba)); 1551 for (i = 0; i < m; i++) { 1552 PetscInt *acols = dnnz, *bcols = onnz; /* Repurpose now-unneeded arrays */ 1553 PetscInt j0, rowlen; 1554 rowlen = ai[i + 1] - ai[i]; 1555 for (j0 = j = 0; j < rowlen; j0 = j) { /* rowlen could be larger than number of rows m, so sum in batches */ 1556 for (; j < PetscMin(rowlen, j0 + m); j++) acols[j - j0] = cdest[aj[ai[i] + j]]; 1557 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, acols, aa + ai[i] + j0, INSERT_VALUES)); 1558 } 1559 rowlen = bi[i + 1] - bi[i]; 1560 for (j0 = j = 0; j < rowlen; j0 = j) { 1561 for (; j < PetscMin(rowlen, j0 + m); j++) bcols[j - j0] = gcdest[bj[bi[i] + j]]; 1562 PetscCall(MatSetValues(Aperm, 1, &rdest[i], j - j0, bcols, ba + bi[i] + j0, INSERT_VALUES)); 1563 } 1564 } 1565 PetscCall(MatAssemblyBegin(Aperm, MAT_FINAL_ASSEMBLY)); 1566 PetscCall(MatAssemblyEnd(Aperm, MAT_FINAL_ASSEMBLY)); 1567 PetscCall(MatRestoreRowIJ(aA, 0, PETSC_FALSE, PETSC_FALSE, &anz, &ai, &aj, &done)); 1568 PetscCall(MatRestoreRowIJ(aB, 0, PETSC_FALSE, PETSC_FALSE, &bnz, &bi, &bj, &done)); 1569 PetscCall(MatSeqAIJRestoreArray(aA, &aa)); 1570 PetscCall(MatSeqAIJRestoreArray(aB, &ba)); 1571 PetscCall(PetscFree4(dnnz, onnz, tdnnz, tonnz)); 1572 PetscCall(PetscFree3(work, rdest, cdest)); 1573 PetscCall(PetscFree(gcdest)); 1574 if (parcolp) PetscCall(ISDestroy(&colp)); 1575 *B = Aperm; 1576 PetscFunctionReturn(0); 1577 } 1578 1579 PetscErrorCode MatGetGhosts_MPIAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 1580 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetSize(aij->B, NULL, nghosts)); 1584 if (ghosts) *ghosts = aij->garray; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode MatGetInfo_MPIAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1589 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1590 Mat A = mat->A, B = mat->B; 1591 PetscLogDouble isend[5], irecv[5]; 1592 1593 PetscFunctionBegin; 1594 info->block_size = 1.0; 1595 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1596 1597 isend[0] = info->nz_used; 1598 isend[1] = info->nz_allocated; 1599 isend[2] = info->nz_unneeded; 1600 isend[3] = info->memory; 1601 isend[4] = info->mallocs; 1602 1603 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1604 1605 isend[0] += info->nz_used; 1606 isend[1] += info->nz_allocated; 1607 isend[2] += info->nz_unneeded; 1608 isend[3] += info->memory; 1609 isend[4] += info->mallocs; 1610 if (flag == MAT_LOCAL) { 1611 info->nz_used = isend[0]; 1612 info->nz_allocated = isend[1]; 1613 info->nz_unneeded = isend[2]; 1614 info->memory = isend[3]; 1615 info->mallocs = isend[4]; 1616 } else if (flag == MAT_GLOBAL_MAX) { 1617 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1618 1619 info->nz_used = irecv[0]; 1620 info->nz_allocated = irecv[1]; 1621 info->nz_unneeded = irecv[2]; 1622 info->memory = irecv[3]; 1623 info->mallocs = irecv[4]; 1624 } else if (flag == MAT_GLOBAL_SUM) { 1625 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1626 1627 info->nz_used = irecv[0]; 1628 info->nz_allocated = irecv[1]; 1629 info->nz_unneeded = irecv[2]; 1630 info->memory = irecv[3]; 1631 info->mallocs = irecv[4]; 1632 } 1633 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1634 info->fill_ratio_needed = 0; 1635 info->factor_mallocs = 0; 1636 PetscFunctionReturn(0); 1637 } 1638 1639 PetscErrorCode MatSetOption_MPIAIJ(Mat A, MatOption op, PetscBool flg) { 1640 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1641 1642 PetscFunctionBegin; 1643 switch (op) { 1644 case MAT_NEW_NONZERO_LOCATIONS: 1645 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1646 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1647 case MAT_KEEP_NONZERO_PATTERN: 1648 case MAT_NEW_NONZERO_LOCATION_ERR: 1649 case MAT_USE_INODES: 1650 case MAT_IGNORE_ZERO_ENTRIES: 1651 case MAT_FORM_EXPLICIT_TRANSPOSE: 1652 MatCheckPreallocated(A, 1); 1653 PetscCall(MatSetOption(a->A, op, flg)); 1654 PetscCall(MatSetOption(a->B, op, flg)); 1655 break; 1656 case MAT_ROW_ORIENTED: 1657 MatCheckPreallocated(A, 1); 1658 a->roworiented = flg; 1659 1660 PetscCall(MatSetOption(a->A, op, flg)); 1661 PetscCall(MatSetOption(a->B, op, flg)); 1662 break; 1663 case MAT_FORCE_DIAGONAL_ENTRIES: 1664 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1665 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1666 /* Symmetry flags are handled directly by MatSetOption() and they don't affect preallocation */ 1667 case MAT_SPD: 1668 case MAT_SYMMETRIC: 1669 case MAT_STRUCTURALLY_SYMMETRIC: 1670 case MAT_HERMITIAN: 1671 case MAT_SYMMETRY_ETERNAL: 1672 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1673 case MAT_SPD_ETERNAL: 1674 /* if the diagonal matrix is square it inherits some of the properties above */ 1675 break; 1676 case MAT_SUBMAT_SINGLEIS: A->submat_singleis = flg; break; 1677 case MAT_STRUCTURE_ONLY: 1678 /* The option is handled directly by MatSetOption() */ 1679 break; 1680 default: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "unknown option %d", op); 1681 } 1682 PetscFunctionReturn(0); 1683 } 1684 1685 PetscErrorCode MatGetRow_MPIAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1686 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)matin->data; 1687 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1688 PetscInt i, *cworkA, *cworkB, **pcA, **pcB, cstart = matin->cmap->rstart; 1689 PetscInt nztot, nzA, nzB, lrow, rstart = matin->rmap->rstart, rend = matin->rmap->rend; 1690 PetscInt *cmap, *idx_p; 1691 1692 PetscFunctionBegin; 1693 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1694 mat->getrowactive = PETSC_TRUE; 1695 1696 if (!mat->rowvalues && (idx || v)) { 1697 /* 1698 allocate enough space to hold information from the longest row. 1699 */ 1700 Mat_SeqAIJ *Aa = (Mat_SeqAIJ *)mat->A->data, *Ba = (Mat_SeqAIJ *)mat->B->data; 1701 PetscInt max = 1, tmp; 1702 for (i = 0; i < matin->rmap->n; i++) { 1703 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1704 if (max < tmp) max = tmp; 1705 } 1706 PetscCall(PetscMalloc2(max, &mat->rowvalues, max, &mat->rowindices)); 1707 } 1708 1709 PetscCheck(row >= rstart && row < rend, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Only local rows"); 1710 lrow = row - rstart; 1711 1712 pvA = &vworkA; 1713 pcA = &cworkA; 1714 pvB = &vworkB; 1715 pcB = &cworkB; 1716 if (!v) { 1717 pvA = NULL; 1718 pvB = NULL; 1719 } 1720 if (!idx) { 1721 pcA = NULL; 1722 if (!v) pcB = NULL; 1723 } 1724 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1725 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1726 nztot = nzA + nzB; 1727 1728 cmap = mat->garray; 1729 if (v || idx) { 1730 if (nztot) { 1731 /* Sort by increasing column numbers, assuming A and B already sorted */ 1732 PetscInt imark = -1; 1733 if (v) { 1734 *v = v_p = mat->rowvalues; 1735 for (i = 0; i < nzB; i++) { 1736 if (cmap[cworkB[i]] < cstart) v_p[i] = vworkB[i]; 1737 else break; 1738 } 1739 imark = i; 1740 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1741 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1742 } 1743 if (idx) { 1744 *idx = idx_p = mat->rowindices; 1745 if (imark > -1) { 1746 for (i = 0; i < imark; i++) { idx_p[i] = cmap[cworkB[i]]; } 1747 } else { 1748 for (i = 0; i < nzB; i++) { 1749 if (cmap[cworkB[i]] < cstart) idx_p[i] = cmap[cworkB[i]]; 1750 else break; 1751 } 1752 imark = i; 1753 } 1754 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart + cworkA[i]; 1755 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i]]; 1756 } 1757 } else { 1758 if (idx) *idx = NULL; 1759 if (v) *v = NULL; 1760 } 1761 } 1762 *nz = nztot; 1763 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1764 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 PetscErrorCode MatRestoreRow_MPIAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1769 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1770 1771 PetscFunctionBegin; 1772 PetscCheck(aij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow() must be called first"); 1773 aij->getrowactive = PETSC_FALSE; 1774 PetscFunctionReturn(0); 1775 } 1776 1777 PetscErrorCode MatNorm_MPIAIJ(Mat mat, NormType type, PetscReal *norm) { 1778 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1779 Mat_SeqAIJ *amat = (Mat_SeqAIJ *)aij->A->data, *bmat = (Mat_SeqAIJ *)aij->B->data; 1780 PetscInt i, j, cstart = mat->cmap->rstart; 1781 PetscReal sum = 0.0; 1782 const MatScalar *v, *amata, *bmata; 1783 1784 PetscFunctionBegin; 1785 if (aij->size == 1) { 1786 PetscCall(MatNorm(aij->A, type, norm)); 1787 } else { 1788 PetscCall(MatSeqAIJGetArrayRead(aij->A, &amata)); 1789 PetscCall(MatSeqAIJGetArrayRead(aij->B, &bmata)); 1790 if (type == NORM_FROBENIUS) { 1791 v = amata; 1792 for (i = 0; i < amat->nz; i++) { 1793 sum += PetscRealPart(PetscConj(*v) * (*v)); 1794 v++; 1795 } 1796 v = bmata; 1797 for (i = 0; i < bmat->nz; i++) { 1798 sum += PetscRealPart(PetscConj(*v) * (*v)); 1799 v++; 1800 } 1801 PetscCall(MPIU_Allreduce(&sum, norm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1802 *norm = PetscSqrtReal(*norm); 1803 PetscCall(PetscLogFlops(2.0 * amat->nz + 2.0 * bmat->nz)); 1804 } else if (type == NORM_1) { /* max column norm */ 1805 PetscReal *tmp, *tmp2; 1806 PetscInt *jj, *garray = aij->garray; 1807 PetscCall(PetscCalloc1(mat->cmap->N + 1, &tmp)); 1808 PetscCall(PetscMalloc1(mat->cmap->N + 1, &tmp2)); 1809 *norm = 0.0; 1810 v = amata; 1811 jj = amat->j; 1812 for (j = 0; j < amat->nz; j++) { 1813 tmp[cstart + *jj++] += PetscAbsScalar(*v); 1814 v++; 1815 } 1816 v = bmata; 1817 jj = bmat->j; 1818 for (j = 0; j < bmat->nz; j++) { 1819 tmp[garray[*jj++]] += PetscAbsScalar(*v); 1820 v++; 1821 } 1822 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 1823 for (j = 0; j < mat->cmap->N; j++) { 1824 if (tmp2[j] > *norm) *norm = tmp2[j]; 1825 } 1826 PetscCall(PetscFree(tmp)); 1827 PetscCall(PetscFree(tmp2)); 1828 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1829 } else if (type == NORM_INFINITY) { /* max row norm */ 1830 PetscReal ntemp = 0.0; 1831 for (j = 0; j < aij->A->rmap->n; j++) { 1832 v = amata + amat->i[j]; 1833 sum = 0.0; 1834 for (i = 0; i < amat->i[j + 1] - amat->i[j]; i++) { 1835 sum += PetscAbsScalar(*v); 1836 v++; 1837 } 1838 v = bmata + bmat->i[j]; 1839 for (i = 0; i < bmat->i[j + 1] - bmat->i[j]; i++) { 1840 sum += PetscAbsScalar(*v); 1841 v++; 1842 } 1843 if (sum > ntemp) ntemp = sum; 1844 } 1845 PetscCall(MPIU_Allreduce(&ntemp, norm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 1846 PetscCall(PetscLogFlops(PetscMax(amat->nz + bmat->nz - 1, 0))); 1847 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for two norm"); 1848 PetscCall(MatSeqAIJRestoreArrayRead(aij->A, &amata)); 1849 PetscCall(MatSeqAIJRestoreArrayRead(aij->B, &bmata)); 1850 } 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode MatTranspose_MPIAIJ(Mat A, MatReuse reuse, Mat *matout) { 1855 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *b; 1856 Mat_SeqAIJ *Aloc = (Mat_SeqAIJ *)a->A->data, *Bloc = (Mat_SeqAIJ *)a->B->data, *sub_B_diag; 1857 PetscInt M = A->rmap->N, N = A->cmap->N, ma, na, mb, nb, row, *cols, *cols_tmp, *B_diag_ilen, i, ncol, A_diag_ncol; 1858 const PetscInt *ai, *aj, *bi, *bj, *B_diag_i; 1859 Mat B, A_diag, *B_diag; 1860 const MatScalar *pbv, *bv; 1861 1862 PetscFunctionBegin; 1863 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1864 ma = A->rmap->n; 1865 na = A->cmap->n; 1866 mb = a->B->rmap->n; 1867 nb = a->B->cmap->n; 1868 ai = Aloc->i; 1869 aj = Aloc->j; 1870 bi = Bloc->i; 1871 bj = Bloc->j; 1872 if (reuse == MAT_INITIAL_MATRIX || *matout == A) { 1873 PetscInt *d_nnz, *g_nnz, *o_nnz; 1874 PetscSFNode *oloc; 1875 PETSC_UNUSED PetscSF sf; 1876 1877 PetscCall(PetscMalloc4(na, &d_nnz, na, &o_nnz, nb, &g_nnz, nb, &oloc)); 1878 /* compute d_nnz for preallocation */ 1879 PetscCall(PetscArrayzero(d_nnz, na)); 1880 for (i = 0; i < ai[ma]; i++) d_nnz[aj[i]]++; 1881 /* compute local off-diagonal contributions */ 1882 PetscCall(PetscArrayzero(g_nnz, nb)); 1883 for (i = 0; i < bi[ma]; i++) g_nnz[bj[i]]++; 1884 /* map those to global */ 1885 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1886 PetscCall(PetscSFSetGraphLayout(sf, A->cmap, nb, NULL, PETSC_USE_POINTER, a->garray)); 1887 PetscCall(PetscSFSetFromOptions(sf)); 1888 PetscCall(PetscArrayzero(o_nnz, na)); 1889 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1890 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, g_nnz, o_nnz, MPI_SUM)); 1891 PetscCall(PetscSFDestroy(&sf)); 1892 1893 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1894 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1895 PetscCall(MatSetBlockSizes(B, PetscAbs(A->cmap->bs), PetscAbs(A->rmap->bs))); 1896 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1897 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 1898 PetscCall(PetscFree4(d_nnz, o_nnz, g_nnz, oloc)); 1899 } else { 1900 B = *matout; 1901 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE)); 1902 } 1903 1904 b = (Mat_MPIAIJ *)B->data; 1905 A_diag = a->A; 1906 B_diag = &b->A; 1907 sub_B_diag = (Mat_SeqAIJ *)(*B_diag)->data; 1908 A_diag_ncol = A_diag->cmap->N; 1909 B_diag_ilen = sub_B_diag->ilen; 1910 B_diag_i = sub_B_diag->i; 1911 1912 /* Set ilen for diagonal of B */ 1913 for (i = 0; i < A_diag_ncol; i++) { B_diag_ilen[i] = B_diag_i[i + 1] - B_diag_i[i]; } 1914 1915 /* Transpose the diagonal part of the matrix. In contrast to the offdiagonal part, this can be done 1916 very quickly (=without using MatSetValues), because all writes are local. */ 1917 PetscCall(MatTransposeSetPrecursor(A_diag, *B_diag)); 1918 PetscCall(MatTranspose(A_diag, MAT_REUSE_MATRIX, B_diag)); 1919 1920 /* copy over the B part */ 1921 PetscCall(PetscMalloc1(bi[mb], &cols)); 1922 PetscCall(MatSeqAIJGetArrayRead(a->B, &bv)); 1923 pbv = bv; 1924 row = A->rmap->rstart; 1925 for (i = 0; i < bi[mb]; i++) cols[i] = a->garray[bj[i]]; 1926 cols_tmp = cols; 1927 for (i = 0; i < mb; i++) { 1928 ncol = bi[i + 1] - bi[i]; 1929 PetscCall(MatSetValues(B, ncol, cols_tmp, 1, &row, pbv, INSERT_VALUES)); 1930 row++; 1931 pbv += ncol; 1932 cols_tmp += ncol; 1933 } 1934 PetscCall(PetscFree(cols)); 1935 PetscCall(MatSeqAIJRestoreArrayRead(a->B, &bv)); 1936 1937 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1938 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1939 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) { 1940 *matout = B; 1941 } else { 1942 PetscCall(MatHeaderMerge(A, &B)); 1943 } 1944 PetscFunctionReturn(0); 1945 } 1946 1947 PetscErrorCode MatDiagonalScale_MPIAIJ(Mat mat, Vec ll, Vec rr) { 1948 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 1949 Mat a = aij->A, b = aij->B; 1950 PetscInt s1, s2, s3; 1951 1952 PetscFunctionBegin; 1953 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1954 if (rr) { 1955 PetscCall(VecGetLocalSize(rr, &s1)); 1956 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1957 /* Overlap communication with computation. */ 1958 PetscCall(VecScatterBegin(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1959 } 1960 if (ll) { 1961 PetscCall(VecGetLocalSize(ll, &s1)); 1962 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1963 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1964 } 1965 /* scale the diagonal block */ 1966 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1967 1968 if (rr) { 1969 /* Do a scatter end and then right scale the off-diagonal block */ 1970 PetscCall(VecScatterEnd(aij->Mvctx, rr, aij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1971 PetscUseTypeMethod(b, diagonalscale, NULL, aij->lvec); 1972 } 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode MatSetUnfactored_MPIAIJ(Mat A) { 1977 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 1978 1979 PetscFunctionBegin; 1980 PetscCall(MatSetUnfactored(a->A)); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode MatEqual_MPIAIJ(Mat A, Mat B, PetscBool *flag) { 1985 Mat_MPIAIJ *matB = (Mat_MPIAIJ *)B->data, *matA = (Mat_MPIAIJ *)A->data; 1986 Mat a, b, c, d; 1987 PetscBool flg; 1988 1989 PetscFunctionBegin; 1990 a = matA->A; 1991 b = matA->B; 1992 c = matB->A; 1993 d = matB->B; 1994 1995 PetscCall(MatEqual(a, c, &flg)); 1996 if (flg) { PetscCall(MatEqual(b, d, &flg)); } 1997 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1998 PetscFunctionReturn(0); 1999 } 2000 2001 PetscErrorCode MatCopy_MPIAIJ(Mat A, Mat B, MatStructure str) { 2002 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2003 Mat_MPIAIJ *b = (Mat_MPIAIJ *)B->data; 2004 2005 PetscFunctionBegin; 2006 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 2007 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 2008 /* because of the column compression in the off-processor part of the matrix a->B, 2009 the number of columns in a->B and b->B may be different, hence we cannot call 2010 the MatCopy() directly on the two parts. If need be, we can provide a more 2011 efficient copy than the MatCopy_Basic() by first uncompressing the a->B matrices 2012 then copying the submatrices */ 2013 PetscCall(MatCopy_Basic(A, B, str)); 2014 } else { 2015 PetscCall(MatCopy(a->A, b->A, str)); 2016 PetscCall(MatCopy(a->B, b->B, str)); 2017 } 2018 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 2019 PetscFunctionReturn(0); 2020 } 2021 2022 PetscErrorCode MatSetUp_MPIAIJ(Mat A) { 2023 PetscFunctionBegin; 2024 PetscCall(MatMPIAIJSetPreallocation(A, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 2025 PetscFunctionReturn(0); 2026 } 2027 2028 /* 2029 Computes the number of nonzeros per row needed for preallocation when X and Y 2030 have different nonzero structure. 2031 */ 2032 PetscErrorCode MatAXPYGetPreallocation_MPIX_private(PetscInt m, const PetscInt *xi, const PetscInt *xj, const PetscInt *xltog, const PetscInt *yi, const PetscInt *yj, const PetscInt *yltog, PetscInt *nnz) { 2033 PetscInt i, j, k, nzx, nzy; 2034 2035 PetscFunctionBegin; 2036 /* Set the number of nonzeros in the new matrix */ 2037 for (i = 0; i < m; i++) { 2038 const PetscInt *xjj = xj + xi[i], *yjj = yj + yi[i]; 2039 nzx = xi[i + 1] - xi[i]; 2040 nzy = yi[i + 1] - yi[i]; 2041 nnz[i] = 0; 2042 for (j = 0, k = 0; j < nzx; j++) { /* Point in X */ 2043 for (; k < nzy && yltog[yjj[k]] < xltog[xjj[j]]; k++) nnz[i]++; /* Catch up to X */ 2044 if (k < nzy && yltog[yjj[k]] == xltog[xjj[j]]) k++; /* Skip duplicate */ 2045 nnz[i]++; 2046 } 2047 for (; k < nzy; k++) nnz[i]++; 2048 } 2049 PetscFunctionReturn(0); 2050 } 2051 2052 /* This is the same as MatAXPYGetPreallocation_SeqAIJ, except that the local-to-global map is provided */ 2053 static PetscErrorCode MatAXPYGetPreallocation_MPIAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 2054 PetscInt m = Y->rmap->N; 2055 Mat_SeqAIJ *x = (Mat_SeqAIJ *)X->data; 2056 Mat_SeqAIJ *y = (Mat_SeqAIJ *)Y->data; 2057 2058 PetscFunctionBegin; 2059 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 2060 PetscFunctionReturn(0); 2061 } 2062 2063 PetscErrorCode MatAXPY_MPIAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 2064 Mat_MPIAIJ *xx = (Mat_MPIAIJ *)X->data, *yy = (Mat_MPIAIJ *)Y->data; 2065 2066 PetscFunctionBegin; 2067 if (str == SAME_NONZERO_PATTERN) { 2068 PetscCall(MatAXPY(yy->A, a, xx->A, str)); 2069 PetscCall(MatAXPY(yy->B, a, xx->B, str)); 2070 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 2071 PetscCall(MatAXPY_Basic(Y, a, X, str)); 2072 } else { 2073 Mat B; 2074 PetscInt *nnz_d, *nnz_o; 2075 2076 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 2077 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 2078 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 2079 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 2080 PetscCall(MatSetLayouts(B, Y->rmap, Y->cmap)); 2081 PetscCall(MatSetType(B, ((PetscObject)Y)->type_name)); 2082 PetscCall(MatAXPYGetPreallocation_SeqAIJ(yy->A, xx->A, nnz_d)); 2083 PetscCall(MatAXPYGetPreallocation_MPIAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 2084 PetscCall(MatMPIAIJSetPreallocation(B, 0, nnz_d, 0, nnz_o)); 2085 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 2086 PetscCall(MatHeaderMerge(Y, &B)); 2087 PetscCall(PetscFree(nnz_d)); 2088 PetscCall(PetscFree(nnz_o)); 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 PETSC_INTERN PetscErrorCode MatConjugate_SeqAIJ(Mat); 2094 2095 PetscErrorCode MatConjugate_MPIAIJ(Mat mat) { 2096 PetscFunctionBegin; 2097 if (PetscDefined(USE_COMPLEX)) { 2098 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2099 2100 PetscCall(MatConjugate_SeqAIJ(aij->A)); 2101 PetscCall(MatConjugate_SeqAIJ(aij->B)); 2102 } 2103 PetscFunctionReturn(0); 2104 } 2105 2106 PetscErrorCode MatRealPart_MPIAIJ(Mat A) { 2107 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2108 2109 PetscFunctionBegin; 2110 PetscCall(MatRealPart(a->A)); 2111 PetscCall(MatRealPart(a->B)); 2112 PetscFunctionReturn(0); 2113 } 2114 2115 PetscErrorCode MatImaginaryPart_MPIAIJ(Mat A) { 2116 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2117 2118 PetscFunctionBegin; 2119 PetscCall(MatImaginaryPart(a->A)); 2120 PetscCall(MatImaginaryPart(a->B)); 2121 PetscFunctionReturn(0); 2122 } 2123 2124 PetscErrorCode MatGetRowMaxAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2125 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2126 PetscInt i, *idxb = NULL, m = A->rmap->n; 2127 PetscScalar *va, *vv; 2128 Vec vB, vA; 2129 const PetscScalar *vb; 2130 2131 PetscFunctionBegin; 2132 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 2133 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 2134 2135 PetscCall(VecGetArrayWrite(vA, &va)); 2136 if (idx) { 2137 for (i = 0; i < m; i++) { 2138 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 2139 } 2140 } 2141 2142 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 2143 PetscCall(PetscMalloc1(m, &idxb)); 2144 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 2145 2146 PetscCall(VecGetArrayWrite(v, &vv)); 2147 PetscCall(VecGetArrayRead(vB, &vb)); 2148 for (i = 0; i < m; i++) { 2149 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 2150 vv[i] = vb[i]; 2151 if (idx) idx[i] = a->garray[idxb[i]]; 2152 } else { 2153 vv[i] = va[i]; 2154 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > a->garray[idxb[i]]) idx[i] = a->garray[idxb[i]]; 2155 } 2156 } 2157 PetscCall(VecRestoreArrayWrite(vA, &vv)); 2158 PetscCall(VecRestoreArrayWrite(vA, &va)); 2159 PetscCall(VecRestoreArrayRead(vB, &vb)); 2160 PetscCall(PetscFree(idxb)); 2161 PetscCall(VecDestroy(&vA)); 2162 PetscCall(VecDestroy(&vB)); 2163 PetscFunctionReturn(0); 2164 } 2165 2166 PetscErrorCode MatGetRowMinAbs_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2167 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2168 PetscInt m = A->rmap->n, n = A->cmap->n; 2169 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2170 PetscInt *cmap = mat->garray; 2171 PetscInt *diagIdx, *offdiagIdx; 2172 Vec diagV, offdiagV; 2173 PetscScalar *a, *diagA, *offdiagA; 2174 const PetscScalar *ba, *bav; 2175 PetscInt r, j, col, ncols, *bi, *bj; 2176 Mat B = mat->B; 2177 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2178 2179 PetscFunctionBegin; 2180 /* When a process holds entire A and other processes have no entry */ 2181 if (A->cmap->N == n) { 2182 PetscCall(VecGetArrayWrite(v, &diagA)); 2183 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2184 PetscCall(MatGetRowMinAbs(mat->A, diagV, idx)); 2185 PetscCall(VecDestroy(&diagV)); 2186 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2187 PetscFunctionReturn(0); 2188 } else if (n == 0) { 2189 if (m) { 2190 PetscCall(VecGetArrayWrite(v, &a)); 2191 for (r = 0; r < m; r++) { 2192 a[r] = 0.0; 2193 if (idx) idx[r] = -1; 2194 } 2195 PetscCall(VecRestoreArrayWrite(v, &a)); 2196 } 2197 PetscFunctionReturn(0); 2198 } 2199 2200 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2201 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2202 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2203 PetscCall(MatGetRowMinAbs(mat->A, diagV, diagIdx)); 2204 2205 /* Get offdiagIdx[] for implicit 0.0 */ 2206 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2207 ba = bav; 2208 bi = b->i; 2209 bj = b->j; 2210 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2211 for (r = 0; r < m; r++) { 2212 ncols = bi[r + 1] - bi[r]; 2213 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2214 offdiagA[r] = *ba; 2215 offdiagIdx[r] = cmap[0]; 2216 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2217 offdiagA[r] = 0.0; 2218 2219 /* Find first hole in the cmap */ 2220 for (j = 0; j < ncols; j++) { 2221 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2222 if (col > j && j < cstart) { 2223 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2224 break; 2225 } else if (col > j + n && j >= cstart) { 2226 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2227 break; 2228 } 2229 } 2230 if (j == ncols && ncols < A->cmap->N - n) { 2231 /* a hole is outside compressed Bcols */ 2232 if (ncols == 0) { 2233 if (cstart) { 2234 offdiagIdx[r] = 0; 2235 } else offdiagIdx[r] = cend; 2236 } else { /* ncols > 0 */ 2237 offdiagIdx[r] = cmap[ncols - 1] + 1; 2238 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2239 } 2240 } 2241 } 2242 2243 for (j = 0; j < ncols; j++) { 2244 if (PetscAbsScalar(offdiagA[r]) > PetscAbsScalar(*ba)) { 2245 offdiagA[r] = *ba; 2246 offdiagIdx[r] = cmap[*bj]; 2247 } 2248 ba++; 2249 bj++; 2250 } 2251 } 2252 2253 PetscCall(VecGetArrayWrite(v, &a)); 2254 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2255 for (r = 0; r < m; ++r) { 2256 if (PetscAbsScalar(diagA[r]) < PetscAbsScalar(offdiagA[r])) { 2257 a[r] = diagA[r]; 2258 if (idx) idx[r] = cstart + diagIdx[r]; 2259 } else if (PetscAbsScalar(diagA[r]) == PetscAbsScalar(offdiagA[r])) { 2260 a[r] = diagA[r]; 2261 if (idx) { 2262 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2263 idx[r] = cstart + diagIdx[r]; 2264 } else idx[r] = offdiagIdx[r]; 2265 } 2266 } else { 2267 a[r] = offdiagA[r]; 2268 if (idx) idx[r] = offdiagIdx[r]; 2269 } 2270 } 2271 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2272 PetscCall(VecRestoreArrayWrite(v, &a)); 2273 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2274 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2275 PetscCall(VecDestroy(&diagV)); 2276 PetscCall(VecDestroy(&offdiagV)); 2277 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2278 PetscFunctionReturn(0); 2279 } 2280 2281 PetscErrorCode MatGetRowMin_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2282 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2283 PetscInt m = A->rmap->n, n = A->cmap->n; 2284 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2285 PetscInt *cmap = mat->garray; 2286 PetscInt *diagIdx, *offdiagIdx; 2287 Vec diagV, offdiagV; 2288 PetscScalar *a, *diagA, *offdiagA; 2289 const PetscScalar *ba, *bav; 2290 PetscInt r, j, col, ncols, *bi, *bj; 2291 Mat B = mat->B; 2292 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2293 2294 PetscFunctionBegin; 2295 /* When a process holds entire A and other processes have no entry */ 2296 if (A->cmap->N == n) { 2297 PetscCall(VecGetArrayWrite(v, &diagA)); 2298 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2299 PetscCall(MatGetRowMin(mat->A, diagV, idx)); 2300 PetscCall(VecDestroy(&diagV)); 2301 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2302 PetscFunctionReturn(0); 2303 } else if (n == 0) { 2304 if (m) { 2305 PetscCall(VecGetArrayWrite(v, &a)); 2306 for (r = 0; r < m; r++) { 2307 a[r] = PETSC_MAX_REAL; 2308 if (idx) idx[r] = -1; 2309 } 2310 PetscCall(VecRestoreArrayWrite(v, &a)); 2311 } 2312 PetscFunctionReturn(0); 2313 } 2314 2315 PetscCall(PetscCalloc2(m, &diagIdx, m, &offdiagIdx)); 2316 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2317 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2318 PetscCall(MatGetRowMin(mat->A, diagV, diagIdx)); 2319 2320 /* Get offdiagIdx[] for implicit 0.0 */ 2321 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2322 ba = bav; 2323 bi = b->i; 2324 bj = b->j; 2325 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2326 for (r = 0; r < m; r++) { 2327 ncols = bi[r + 1] - bi[r]; 2328 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2329 offdiagA[r] = *ba; 2330 offdiagIdx[r] = cmap[0]; 2331 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2332 offdiagA[r] = 0.0; 2333 2334 /* Find first hole in the cmap */ 2335 for (j = 0; j < ncols; j++) { 2336 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2337 if (col > j && j < cstart) { 2338 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2339 break; 2340 } else if (col > j + n && j >= cstart) { 2341 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2342 break; 2343 } 2344 } 2345 if (j == ncols && ncols < A->cmap->N - n) { 2346 /* a hole is outside compressed Bcols */ 2347 if (ncols == 0) { 2348 if (cstart) { 2349 offdiagIdx[r] = 0; 2350 } else offdiagIdx[r] = cend; 2351 } else { /* ncols > 0 */ 2352 offdiagIdx[r] = cmap[ncols - 1] + 1; 2353 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2354 } 2355 } 2356 } 2357 2358 for (j = 0; j < ncols; j++) { 2359 if (PetscRealPart(offdiagA[r]) > PetscRealPart(*ba)) { 2360 offdiagA[r] = *ba; 2361 offdiagIdx[r] = cmap[*bj]; 2362 } 2363 ba++; 2364 bj++; 2365 } 2366 } 2367 2368 PetscCall(VecGetArrayWrite(v, &a)); 2369 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2370 for (r = 0; r < m; ++r) { 2371 if (PetscRealPart(diagA[r]) < PetscRealPart(offdiagA[r])) { 2372 a[r] = diagA[r]; 2373 if (idx) idx[r] = cstart + diagIdx[r]; 2374 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2375 a[r] = diagA[r]; 2376 if (idx) { 2377 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2378 idx[r] = cstart + diagIdx[r]; 2379 } else idx[r] = offdiagIdx[r]; 2380 } 2381 } else { 2382 a[r] = offdiagA[r]; 2383 if (idx) idx[r] = offdiagIdx[r]; 2384 } 2385 } 2386 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2387 PetscCall(VecRestoreArrayWrite(v, &a)); 2388 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2389 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2390 PetscCall(VecDestroy(&diagV)); 2391 PetscCall(VecDestroy(&offdiagV)); 2392 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2393 PetscFunctionReturn(0); 2394 } 2395 2396 PetscErrorCode MatGetRowMax_MPIAIJ(Mat A, Vec v, PetscInt idx[]) { 2397 Mat_MPIAIJ *mat = (Mat_MPIAIJ *)A->data; 2398 PetscInt m = A->rmap->n, n = A->cmap->n; 2399 PetscInt cstart = A->cmap->rstart, cend = A->cmap->rend; 2400 PetscInt *cmap = mat->garray; 2401 PetscInt *diagIdx, *offdiagIdx; 2402 Vec diagV, offdiagV; 2403 PetscScalar *a, *diagA, *offdiagA; 2404 const PetscScalar *ba, *bav; 2405 PetscInt r, j, col, ncols, *bi, *bj; 2406 Mat B = mat->B; 2407 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 2408 2409 PetscFunctionBegin; 2410 /* When a process holds entire A and other processes have no entry */ 2411 if (A->cmap->N == n) { 2412 PetscCall(VecGetArrayWrite(v, &diagA)); 2413 PetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, m, diagA, &diagV)); 2414 PetscCall(MatGetRowMax(mat->A, diagV, idx)); 2415 PetscCall(VecDestroy(&diagV)); 2416 PetscCall(VecRestoreArrayWrite(v, &diagA)); 2417 PetscFunctionReturn(0); 2418 } else if (n == 0) { 2419 if (m) { 2420 PetscCall(VecGetArrayWrite(v, &a)); 2421 for (r = 0; r < m; r++) { 2422 a[r] = PETSC_MIN_REAL; 2423 if (idx) idx[r] = -1; 2424 } 2425 PetscCall(VecRestoreArrayWrite(v, &a)); 2426 } 2427 PetscFunctionReturn(0); 2428 } 2429 2430 PetscCall(PetscMalloc2(m, &diagIdx, m, &offdiagIdx)); 2431 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &diagV)); 2432 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &offdiagV)); 2433 PetscCall(MatGetRowMax(mat->A, diagV, diagIdx)); 2434 2435 /* Get offdiagIdx[] for implicit 0.0 */ 2436 PetscCall(MatSeqAIJGetArrayRead(B, &bav)); 2437 ba = bav; 2438 bi = b->i; 2439 bj = b->j; 2440 PetscCall(VecGetArrayWrite(offdiagV, &offdiagA)); 2441 for (r = 0; r < m; r++) { 2442 ncols = bi[r + 1] - bi[r]; 2443 if (ncols == A->cmap->N - n) { /* Brow is dense */ 2444 offdiagA[r] = *ba; 2445 offdiagIdx[r] = cmap[0]; 2446 } else { /* Brow is sparse so already KNOW maximum is 0.0 or higher */ 2447 offdiagA[r] = 0.0; 2448 2449 /* Find first hole in the cmap */ 2450 for (j = 0; j < ncols; j++) { 2451 col = cmap[bj[j]]; /* global column number = cmap[B column number] */ 2452 if (col > j && j < cstart) { 2453 offdiagIdx[r] = j; /* global column number of first implicit 0.0 */ 2454 break; 2455 } else if (col > j + n && j >= cstart) { 2456 offdiagIdx[r] = j + n; /* global column number of first implicit 0.0 */ 2457 break; 2458 } 2459 } 2460 if (j == ncols && ncols < A->cmap->N - n) { 2461 /* a hole is outside compressed Bcols */ 2462 if (ncols == 0) { 2463 if (cstart) { 2464 offdiagIdx[r] = 0; 2465 } else offdiagIdx[r] = cend; 2466 } else { /* ncols > 0 */ 2467 offdiagIdx[r] = cmap[ncols - 1] + 1; 2468 if (offdiagIdx[r] == cstart) offdiagIdx[r] += n; 2469 } 2470 } 2471 } 2472 2473 for (j = 0; j < ncols; j++) { 2474 if (PetscRealPart(offdiagA[r]) < PetscRealPart(*ba)) { 2475 offdiagA[r] = *ba; 2476 offdiagIdx[r] = cmap[*bj]; 2477 } 2478 ba++; 2479 bj++; 2480 } 2481 } 2482 2483 PetscCall(VecGetArrayWrite(v, &a)); 2484 PetscCall(VecGetArrayRead(diagV, (const PetscScalar **)&diagA)); 2485 for (r = 0; r < m; ++r) { 2486 if (PetscRealPart(diagA[r]) > PetscRealPart(offdiagA[r])) { 2487 a[r] = diagA[r]; 2488 if (idx) idx[r] = cstart + diagIdx[r]; 2489 } else if (PetscRealPart(diagA[r]) == PetscRealPart(offdiagA[r])) { 2490 a[r] = diagA[r]; 2491 if (idx) { 2492 if (cstart + diagIdx[r] <= offdiagIdx[r]) { 2493 idx[r] = cstart + diagIdx[r]; 2494 } else idx[r] = offdiagIdx[r]; 2495 } 2496 } else { 2497 a[r] = offdiagA[r]; 2498 if (idx) idx[r] = offdiagIdx[r]; 2499 } 2500 } 2501 PetscCall(MatSeqAIJRestoreArrayRead(B, &bav)); 2502 PetscCall(VecRestoreArrayWrite(v, &a)); 2503 PetscCall(VecRestoreArrayRead(diagV, (const PetscScalar **)&diagA)); 2504 PetscCall(VecRestoreArrayWrite(offdiagV, &offdiagA)); 2505 PetscCall(VecDestroy(&diagV)); 2506 PetscCall(VecDestroy(&offdiagV)); 2507 PetscCall(PetscFree2(diagIdx, offdiagIdx)); 2508 PetscFunctionReturn(0); 2509 } 2510 2511 PetscErrorCode MatGetSeqNonzeroStructure_MPIAIJ(Mat mat, Mat *newmat) { 2512 Mat *dummy; 2513 2514 PetscFunctionBegin; 2515 PetscCall(MatCreateSubMatrix_MPIAIJ_All(mat, MAT_DO_NOT_GET_VALUES, MAT_INITIAL_MATRIX, &dummy)); 2516 *newmat = *dummy; 2517 PetscCall(PetscFree(dummy)); 2518 PetscFunctionReturn(0); 2519 } 2520 2521 PetscErrorCode MatInvertBlockDiagonal_MPIAIJ(Mat A, const PetscScalar **values) { 2522 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2523 2524 PetscFunctionBegin; 2525 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2526 A->factorerrortype = a->A->factorerrortype; 2527 PetscFunctionReturn(0); 2528 } 2529 2530 static PetscErrorCode MatSetRandom_MPIAIJ(Mat x, PetscRandom rctx) { 2531 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)x->data; 2532 2533 PetscFunctionBegin; 2534 PetscCheck(x->assembled || x->preallocated, PetscObjectComm((PetscObject)x), PETSC_ERR_ARG_WRONGSTATE, "MatSetRandom on an unassembled and unpreallocated MATMPIAIJ is not allowed"); 2535 PetscCall(MatSetRandom(aij->A, rctx)); 2536 if (x->assembled) { 2537 PetscCall(MatSetRandom(aij->B, rctx)); 2538 } else { 2539 PetscCall(MatSetRandomSkipColumnRange_SeqAIJ_Private(aij->B, x->cmap->rstart, x->cmap->rend, rctx)); 2540 } 2541 PetscCall(MatAssemblyBegin(x, MAT_FINAL_ASSEMBLY)); 2542 PetscCall(MatAssemblyEnd(x, MAT_FINAL_ASSEMBLY)); 2543 PetscFunctionReturn(0); 2544 } 2545 2546 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ(Mat A, PetscBool sc) { 2547 PetscFunctionBegin; 2548 if (sc) A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ_Scalable; 2549 else A->ops->increaseoverlap = MatIncreaseOverlap_MPIAIJ; 2550 PetscFunctionReturn(0); 2551 } 2552 2553 /*@ 2554 MatMPIAIJGetNumberNonzeros - gets the number of nonzeros in the matrix on this MPI rank 2555 2556 Not collective 2557 2558 Input Parameter: 2559 . A - the matrix 2560 2561 Output Parameter: 2562 . nz - the number of nonzeros 2563 2564 Level: advanced 2565 2566 @*/ 2567 PetscErrorCode MatMPIAIJGetNumberNonzeros(Mat A, PetscCount *nz) { 2568 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)A->data; 2569 Mat_SeqAIJ *aaij = (Mat_SeqAIJ *)maij->A->data, *baij = (Mat_SeqAIJ *)maij->B->data; 2570 2571 PetscFunctionBegin; 2572 *nz = aaij->i[A->rmap->n] + baij->i[A->rmap->n]; 2573 PetscFunctionReturn(0); 2574 } 2575 2576 /*@ 2577 MatMPIAIJSetUseScalableIncreaseOverlap - Determine if the matrix uses a scalable algorithm to compute the overlap 2578 2579 Collective on Mat 2580 2581 Input Parameters: 2582 + A - the matrix 2583 - sc - PETSC_TRUE indicates use the scalable algorithm (default is not to use the scalable algorithm) 2584 2585 Level: advanced 2586 2587 @*/ 2588 PetscErrorCode MatMPIAIJSetUseScalableIncreaseOverlap(Mat A, PetscBool sc) { 2589 PetscFunctionBegin; 2590 PetscTryMethod(A, "MatMPIAIJSetUseScalableIncreaseOverlap_C", (Mat, PetscBool), (A, sc)); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 PetscErrorCode MatSetFromOptions_MPIAIJ(Mat A, PetscOptionItems *PetscOptionsObject) { 2595 PetscBool sc = PETSC_FALSE, flg; 2596 2597 PetscFunctionBegin; 2598 PetscOptionsHeadBegin(PetscOptionsObject, "MPIAIJ options"); 2599 if (A->ops->increaseoverlap == MatIncreaseOverlap_MPIAIJ_Scalable) sc = PETSC_TRUE; 2600 PetscCall(PetscOptionsBool("-mat_increase_overlap_scalable", "Use a scalable algorithm to compute the overlap", "MatIncreaseOverlap", sc, &sc, &flg)); 2601 if (flg) PetscCall(MatMPIAIJSetUseScalableIncreaseOverlap(A, sc)); 2602 PetscOptionsHeadEnd(); 2603 PetscFunctionReturn(0); 2604 } 2605 2606 PetscErrorCode MatShift_MPIAIJ(Mat Y, PetscScalar a) { 2607 Mat_MPIAIJ *maij = (Mat_MPIAIJ *)Y->data; 2608 Mat_SeqAIJ *aij = (Mat_SeqAIJ *)maij->A->data; 2609 2610 PetscFunctionBegin; 2611 if (!Y->preallocated) { 2612 PetscCall(MatMPIAIJSetPreallocation(Y, 1, NULL, 0, NULL)); 2613 } else if (!aij->nz) { /* It does not matter if diagonals of Y only partially lie in maij->A. We just need an estimated preallocation. */ 2614 PetscInt nonew = aij->nonew; 2615 PetscCall(MatSeqAIJSetPreallocation(maij->A, 1, NULL)); 2616 aij->nonew = nonew; 2617 } 2618 PetscCall(MatShift_Basic(Y, a)); 2619 PetscFunctionReturn(0); 2620 } 2621 2622 PetscErrorCode MatMissingDiagonal_MPIAIJ(Mat A, PetscBool *missing, PetscInt *d) { 2623 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2624 2625 PetscFunctionBegin; 2626 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2627 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2628 if (d) { 2629 PetscInt rstart; 2630 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2631 *d += rstart; 2632 } 2633 PetscFunctionReturn(0); 2634 } 2635 2636 PetscErrorCode MatInvertVariableBlockDiagonal_MPIAIJ(Mat A, PetscInt nblocks, const PetscInt *bsizes, PetscScalar *diag) { 2637 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 2638 2639 PetscFunctionBegin; 2640 PetscCall(MatInvertVariableBlockDiagonal(a->A, nblocks, bsizes, diag)); 2641 PetscFunctionReturn(0); 2642 } 2643 2644 /* -------------------------------------------------------------------*/ 2645 static struct _MatOps MatOps_Values = {MatSetValues_MPIAIJ, 2646 MatGetRow_MPIAIJ, 2647 MatRestoreRow_MPIAIJ, 2648 MatMult_MPIAIJ, 2649 /* 4*/ MatMultAdd_MPIAIJ, 2650 MatMultTranspose_MPIAIJ, 2651 MatMultTransposeAdd_MPIAIJ, 2652 NULL, 2653 NULL, 2654 NULL, 2655 /*10*/ NULL, 2656 NULL, 2657 NULL, 2658 MatSOR_MPIAIJ, 2659 MatTranspose_MPIAIJ, 2660 /*15*/ MatGetInfo_MPIAIJ, 2661 MatEqual_MPIAIJ, 2662 MatGetDiagonal_MPIAIJ, 2663 MatDiagonalScale_MPIAIJ, 2664 MatNorm_MPIAIJ, 2665 /*20*/ MatAssemblyBegin_MPIAIJ, 2666 MatAssemblyEnd_MPIAIJ, 2667 MatSetOption_MPIAIJ, 2668 MatZeroEntries_MPIAIJ, 2669 /*24*/ MatZeroRows_MPIAIJ, 2670 NULL, 2671 NULL, 2672 NULL, 2673 NULL, 2674 /*29*/ MatSetUp_MPIAIJ, 2675 NULL, 2676 NULL, 2677 MatGetDiagonalBlock_MPIAIJ, 2678 NULL, 2679 /*34*/ MatDuplicate_MPIAIJ, 2680 NULL, 2681 NULL, 2682 NULL, 2683 NULL, 2684 /*39*/ MatAXPY_MPIAIJ, 2685 MatCreateSubMatrices_MPIAIJ, 2686 MatIncreaseOverlap_MPIAIJ, 2687 MatGetValues_MPIAIJ, 2688 MatCopy_MPIAIJ, 2689 /*44*/ MatGetRowMax_MPIAIJ, 2690 MatScale_MPIAIJ, 2691 MatShift_MPIAIJ, 2692 MatDiagonalSet_MPIAIJ, 2693 MatZeroRowsColumns_MPIAIJ, 2694 /*49*/ MatSetRandom_MPIAIJ, 2695 MatGetRowIJ_MPIAIJ, 2696 MatRestoreRowIJ_MPIAIJ, 2697 NULL, 2698 NULL, 2699 /*54*/ MatFDColoringCreate_MPIXAIJ, 2700 NULL, 2701 MatSetUnfactored_MPIAIJ, 2702 MatPermute_MPIAIJ, 2703 NULL, 2704 /*59*/ MatCreateSubMatrix_MPIAIJ, 2705 MatDestroy_MPIAIJ, 2706 MatView_MPIAIJ, 2707 NULL, 2708 NULL, 2709 /*64*/ NULL, 2710 MatMatMatMultNumeric_MPIAIJ_MPIAIJ_MPIAIJ, 2711 NULL, 2712 NULL, 2713 NULL, 2714 /*69*/ MatGetRowMaxAbs_MPIAIJ, 2715 MatGetRowMinAbs_MPIAIJ, 2716 NULL, 2717 NULL, 2718 NULL, 2719 NULL, 2720 /*75*/ MatFDColoringApply_AIJ, 2721 MatSetFromOptions_MPIAIJ, 2722 NULL, 2723 NULL, 2724 MatFindZeroDiagonals_MPIAIJ, 2725 /*80*/ NULL, 2726 NULL, 2727 NULL, 2728 /*83*/ MatLoad_MPIAIJ, 2729 MatIsSymmetric_MPIAIJ, 2730 NULL, 2731 NULL, 2732 NULL, 2733 NULL, 2734 /*89*/ NULL, 2735 NULL, 2736 MatMatMultNumeric_MPIAIJ_MPIAIJ, 2737 NULL, 2738 NULL, 2739 /*94*/ MatPtAPNumeric_MPIAIJ_MPIAIJ, 2740 NULL, 2741 NULL, 2742 NULL, 2743 MatBindToCPU_MPIAIJ, 2744 /*99*/ MatProductSetFromOptions_MPIAIJ, 2745 NULL, 2746 NULL, 2747 MatConjugate_MPIAIJ, 2748 NULL, 2749 /*104*/ MatSetValuesRow_MPIAIJ, 2750 MatRealPart_MPIAIJ, 2751 MatImaginaryPart_MPIAIJ, 2752 NULL, 2753 NULL, 2754 /*109*/ NULL, 2755 NULL, 2756 MatGetRowMin_MPIAIJ, 2757 NULL, 2758 MatMissingDiagonal_MPIAIJ, 2759 /*114*/ MatGetSeqNonzeroStructure_MPIAIJ, 2760 NULL, 2761 MatGetGhosts_MPIAIJ, 2762 NULL, 2763 NULL, 2764 /*119*/ MatMultDiagonalBlock_MPIAIJ, 2765 NULL, 2766 NULL, 2767 NULL, 2768 MatGetMultiProcBlock_MPIAIJ, 2769 /*124*/ MatFindNonzeroRows_MPIAIJ, 2770 MatGetColumnReductions_MPIAIJ, 2771 MatInvertBlockDiagonal_MPIAIJ, 2772 MatInvertVariableBlockDiagonal_MPIAIJ, 2773 MatCreateSubMatricesMPI_MPIAIJ, 2774 /*129*/ NULL, 2775 NULL, 2776 NULL, 2777 MatTransposeMatMultNumeric_MPIAIJ_MPIAIJ, 2778 NULL, 2779 /*134*/ NULL, 2780 NULL, 2781 NULL, 2782 NULL, 2783 NULL, 2784 /*139*/ MatSetBlockSizes_MPIAIJ, 2785 NULL, 2786 NULL, 2787 MatFDColoringSetUp_MPIXAIJ, 2788 MatFindOffBlockDiagonalEntries_MPIAIJ, 2789 MatCreateMPIMatConcatenateSeqMat_MPIAIJ, 2790 /*145*/ NULL, 2791 NULL, 2792 NULL, 2793 MatCreateGraph_Simple_AIJ, 2794 MatFilter_AIJ, 2795 /*150*/ NULL}; 2796 2797 /* ----------------------------------------------------------------------------------------*/ 2798 2799 PetscErrorCode MatStoreValues_MPIAIJ(Mat mat) { 2800 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2801 2802 PetscFunctionBegin; 2803 PetscCall(MatStoreValues(aij->A)); 2804 PetscCall(MatStoreValues(aij->B)); 2805 PetscFunctionReturn(0); 2806 } 2807 2808 PetscErrorCode MatRetrieveValues_MPIAIJ(Mat mat) { 2809 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 2810 2811 PetscFunctionBegin; 2812 PetscCall(MatRetrieveValues(aij->A)); 2813 PetscCall(MatRetrieveValues(aij->B)); 2814 PetscFunctionReturn(0); 2815 } 2816 2817 PetscErrorCode MatMPIAIJSetPreallocation_MPIAIJ(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2818 Mat_MPIAIJ *b; 2819 PetscMPIInt size; 2820 2821 PetscFunctionBegin; 2822 PetscCall(PetscLayoutSetUp(B->rmap)); 2823 PetscCall(PetscLayoutSetUp(B->cmap)); 2824 b = (Mat_MPIAIJ *)B->data; 2825 2826 #if defined(PETSC_USE_CTABLE) 2827 PetscCall(PetscTableDestroy(&b->colmap)); 2828 #else 2829 PetscCall(PetscFree(b->colmap)); 2830 #endif 2831 PetscCall(PetscFree(b->garray)); 2832 PetscCall(VecDestroy(&b->lvec)); 2833 PetscCall(VecScatterDestroy(&b->Mvctx)); 2834 2835 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2836 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2837 PetscCall(MatDestroy(&b->B)); 2838 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2839 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2840 PetscCall(MatSetBlockSizesFromMats(b->B, B, B)); 2841 PetscCall(MatSetType(b->B, MATSEQAIJ)); 2842 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B)); 2843 2844 if (!B->preallocated) { 2845 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2846 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2847 PetscCall(MatSetBlockSizesFromMats(b->A, B, B)); 2848 PetscCall(MatSetType(b->A, MATSEQAIJ)); 2849 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A)); 2850 } 2851 2852 PetscCall(MatSeqAIJSetPreallocation(b->A, d_nz, d_nnz)); 2853 PetscCall(MatSeqAIJSetPreallocation(b->B, o_nz, o_nnz)); 2854 B->preallocated = PETSC_TRUE; 2855 B->was_assembled = PETSC_FALSE; 2856 B->assembled = PETSC_FALSE; 2857 PetscFunctionReturn(0); 2858 } 2859 2860 PetscErrorCode MatResetPreallocation_MPIAIJ(Mat B) { 2861 Mat_MPIAIJ *b; 2862 2863 PetscFunctionBegin; 2864 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2865 PetscCall(PetscLayoutSetUp(B->rmap)); 2866 PetscCall(PetscLayoutSetUp(B->cmap)); 2867 b = (Mat_MPIAIJ *)B->data; 2868 2869 #if defined(PETSC_USE_CTABLE) 2870 PetscCall(PetscTableDestroy(&b->colmap)); 2871 #else 2872 PetscCall(PetscFree(b->colmap)); 2873 #endif 2874 PetscCall(PetscFree(b->garray)); 2875 PetscCall(VecDestroy(&b->lvec)); 2876 PetscCall(VecScatterDestroy(&b->Mvctx)); 2877 2878 PetscCall(MatResetPreallocation(b->A)); 2879 PetscCall(MatResetPreallocation(b->B)); 2880 B->preallocated = PETSC_TRUE; 2881 B->was_assembled = PETSC_FALSE; 2882 B->assembled = PETSC_FALSE; 2883 PetscFunctionReturn(0); 2884 } 2885 2886 PetscErrorCode MatDuplicate_MPIAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 2887 Mat mat; 2888 Mat_MPIAIJ *a, *oldmat = (Mat_MPIAIJ *)matin->data; 2889 2890 PetscFunctionBegin; 2891 *newmat = NULL; 2892 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 2893 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 2894 PetscCall(MatSetBlockSizesFromMats(mat, matin, matin)); 2895 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 2896 a = (Mat_MPIAIJ *)mat->data; 2897 2898 mat->factortype = matin->factortype; 2899 mat->assembled = matin->assembled; 2900 mat->insertmode = NOT_SET_VALUES; 2901 mat->preallocated = matin->preallocated; 2902 2903 a->size = oldmat->size; 2904 a->rank = oldmat->rank; 2905 a->donotstash = oldmat->donotstash; 2906 a->roworiented = oldmat->roworiented; 2907 a->rowindices = NULL; 2908 a->rowvalues = NULL; 2909 a->getrowactive = PETSC_FALSE; 2910 2911 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 2912 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 2913 2914 if (oldmat->colmap) { 2915 #if defined(PETSC_USE_CTABLE) 2916 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 2917 #else 2918 PetscCall(PetscMalloc1(mat->cmap->N, &a->colmap)); 2919 PetscCall(PetscLogObjectMemory((PetscObject)mat, (mat->cmap->N) * sizeof(PetscInt))); 2920 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, mat->cmap->N)); 2921 #endif 2922 } else a->colmap = NULL; 2923 if (oldmat->garray) { 2924 PetscInt len; 2925 len = oldmat->B->cmap->n; 2926 PetscCall(PetscMalloc1(len + 1, &a->garray)); 2927 PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt))); 2928 if (len) PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 2929 } else a->garray = NULL; 2930 2931 /* It may happen MatDuplicate is called with a non-assembled matrix 2932 In fact, MatDuplicate only requires the matrix to be preallocated 2933 This may happen inside a DMCreateMatrix_Shell */ 2934 if (oldmat->lvec) { 2935 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 2936 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec)); 2937 } 2938 if (oldmat->Mvctx) { 2939 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 2940 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx)); 2941 } 2942 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 2943 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 2944 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 2945 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B)); 2946 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 2947 *newmat = mat; 2948 PetscFunctionReturn(0); 2949 } 2950 2951 PetscErrorCode MatLoad_MPIAIJ(Mat newMat, PetscViewer viewer) { 2952 PetscBool isbinary, ishdf5; 2953 2954 PetscFunctionBegin; 2955 PetscValidHeaderSpecific(newMat, MAT_CLASSID, 1); 2956 PetscValidHeaderSpecific(viewer, PETSC_VIEWER_CLASSID, 2); 2957 /* force binary viewer to load .info file if it has not yet done so */ 2958 PetscCall(PetscViewerSetUp(viewer)); 2959 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 2960 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERHDF5, &ishdf5)); 2961 if (isbinary) { 2962 PetscCall(MatLoad_MPIAIJ_Binary(newMat, viewer)); 2963 } else if (ishdf5) { 2964 #if defined(PETSC_HAVE_HDF5) 2965 PetscCall(MatLoad_AIJ_HDF5(newMat, viewer)); 2966 #else 2967 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "HDF5 not supported in this build.\nPlease reconfigure using --download-hdf5"); 2968 #endif 2969 } else { 2970 SETERRQ(PetscObjectComm((PetscObject)newMat), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)newMat)->type_name); 2971 } 2972 PetscFunctionReturn(0); 2973 } 2974 2975 PetscErrorCode MatLoad_MPIAIJ_Binary(Mat mat, PetscViewer viewer) { 2976 PetscInt header[4], M, N, m, nz, rows, cols, sum, i; 2977 PetscInt *rowidxs, *colidxs; 2978 PetscScalar *matvals; 2979 2980 PetscFunctionBegin; 2981 PetscCall(PetscViewerSetUp(viewer)); 2982 2983 /* read in matrix header */ 2984 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 2985 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 2986 M = header[1]; 2987 N = header[2]; 2988 nz = header[3]; 2989 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 2990 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 2991 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIAIJ"); 2992 2993 /* set block sizes from the viewer's .info file */ 2994 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 2995 /* set global sizes if not set already */ 2996 if (mat->rmap->N < 0) mat->rmap->N = M; 2997 if (mat->cmap->N < 0) mat->cmap->N = N; 2998 PetscCall(PetscLayoutSetUp(mat->rmap)); 2999 PetscCall(PetscLayoutSetUp(mat->cmap)); 3000 3001 /* check if the matrix sizes are correct */ 3002 PetscCall(MatGetSize(mat, &rows, &cols)); 3003 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3004 3005 /* read in row lengths and build row indices */ 3006 PetscCall(MatGetLocalSize(mat, &m, NULL)); 3007 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3008 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3009 rowidxs[0] = 0; 3010 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3011 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3012 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3013 /* read in column indices and matrix values */ 3014 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3015 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3016 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3017 /* store matrix indices and values */ 3018 PetscCall(MatMPIAIJSetPreallocationCSR(mat, rowidxs, colidxs, matvals)); 3019 PetscCall(PetscFree(rowidxs)); 3020 PetscCall(PetscFree2(colidxs, matvals)); 3021 PetscFunctionReturn(0); 3022 } 3023 3024 /* Not scalable because of ISAllGather() unless getting all columns. */ 3025 PetscErrorCode ISGetSeqIS_Private(Mat mat, IS iscol, IS *isseq) { 3026 IS iscol_local; 3027 PetscBool isstride; 3028 PetscMPIInt lisstride = 0, gisstride; 3029 3030 PetscFunctionBegin; 3031 /* check if we are grabbing all columns*/ 3032 PetscCall(PetscObjectTypeCompare((PetscObject)iscol, ISSTRIDE, &isstride)); 3033 3034 if (isstride) { 3035 PetscInt start, len, mstart, mlen; 3036 PetscCall(ISStrideGetInfo(iscol, &start, NULL)); 3037 PetscCall(ISGetLocalSize(iscol, &len)); 3038 PetscCall(MatGetOwnershipRangeColumn(mat, &mstart, &mlen)); 3039 if (mstart == start && mlen - mstart == len) lisstride = 1; 3040 } 3041 3042 PetscCall(MPIU_Allreduce(&lisstride, &gisstride, 1, MPI_INT, MPI_MIN, PetscObjectComm((PetscObject)mat))); 3043 if (gisstride) { 3044 PetscInt N; 3045 PetscCall(MatGetSize(mat, NULL, &N)); 3046 PetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &iscol_local)); 3047 PetscCall(ISSetIdentity(iscol_local)); 3048 PetscCall(PetscInfo(mat, "Optimizing for obtaining all columns of the matrix; skipping ISAllGather()\n")); 3049 } else { 3050 PetscInt cbs; 3051 PetscCall(ISGetBlockSize(iscol, &cbs)); 3052 PetscCall(ISAllGather(iscol, &iscol_local)); 3053 PetscCall(ISSetBlockSize(iscol_local, cbs)); 3054 } 3055 3056 *isseq = iscol_local; 3057 PetscFunctionReturn(0); 3058 } 3059 3060 /* 3061 Used by MatCreateSubMatrix_MPIAIJ_SameRowColDist() to avoid ISAllGather() and global size of iscol_local 3062 (see MatCreateSubMatrix_MPIAIJ_nonscalable) 3063 3064 Input Parameters: 3065 mat - matrix 3066 isrow - parallel row index set; its local indices are a subset of local columns of mat, 3067 i.e., mat->rstart <= isrow[i] < mat->rend 3068 iscol - parallel column index set; its local indices are a subset of local columns of mat, 3069 i.e., mat->cstart <= iscol[i] < mat->cend 3070 Output Parameter: 3071 isrow_d,iscol_d - sequential row and column index sets for retrieving mat->A 3072 iscol_o - sequential column index set for retrieving mat->B 3073 garray - column map; garray[i] indicates global location of iscol_o[i] in iscol 3074 */ 3075 PetscErrorCode ISGetSeqIS_SameColDist_Private(Mat mat, IS isrow, IS iscol, IS *isrow_d, IS *iscol_d, IS *iscol_o, const PetscInt *garray[]) { 3076 Vec x, cmap; 3077 const PetscInt *is_idx; 3078 PetscScalar *xarray, *cmaparray; 3079 PetscInt ncols, isstart, *idx, m, rstart, *cmap1, count; 3080 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3081 Mat B = a->B; 3082 Vec lvec = a->lvec, lcmap; 3083 PetscInt i, cstart, cend, Bn = B->cmap->N; 3084 MPI_Comm comm; 3085 VecScatter Mvctx = a->Mvctx; 3086 3087 PetscFunctionBegin; 3088 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3089 PetscCall(ISGetLocalSize(iscol, &ncols)); 3090 3091 /* (1) iscol is a sub-column vector of mat, pad it with '-1.' to form a full vector x */ 3092 PetscCall(MatCreateVecs(mat, &x, NULL)); 3093 PetscCall(VecSet(x, -1.0)); 3094 PetscCall(VecDuplicate(x, &cmap)); 3095 PetscCall(VecSet(cmap, -1.0)); 3096 3097 /* Get start indices */ 3098 PetscCallMPI(MPI_Scan(&ncols, &isstart, 1, MPIU_INT, MPI_SUM, comm)); 3099 isstart -= ncols; 3100 PetscCall(MatGetOwnershipRangeColumn(mat, &cstart, &cend)); 3101 3102 PetscCall(ISGetIndices(iscol, &is_idx)); 3103 PetscCall(VecGetArray(x, &xarray)); 3104 PetscCall(VecGetArray(cmap, &cmaparray)); 3105 PetscCall(PetscMalloc1(ncols, &idx)); 3106 for (i = 0; i < ncols; i++) { 3107 xarray[is_idx[i] - cstart] = (PetscScalar)is_idx[i]; 3108 cmaparray[is_idx[i] - cstart] = i + isstart; /* global index of iscol[i] */ 3109 idx[i] = is_idx[i] - cstart; /* local index of iscol[i] */ 3110 } 3111 PetscCall(VecRestoreArray(x, &xarray)); 3112 PetscCall(VecRestoreArray(cmap, &cmaparray)); 3113 PetscCall(ISRestoreIndices(iscol, &is_idx)); 3114 3115 /* Get iscol_d */ 3116 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, iscol_d)); 3117 PetscCall(ISGetBlockSize(iscol, &i)); 3118 PetscCall(ISSetBlockSize(*iscol_d, i)); 3119 3120 /* Get isrow_d */ 3121 PetscCall(ISGetLocalSize(isrow, &m)); 3122 rstart = mat->rmap->rstart; 3123 PetscCall(PetscMalloc1(m, &idx)); 3124 PetscCall(ISGetIndices(isrow, &is_idx)); 3125 for (i = 0; i < m; i++) idx[i] = is_idx[i] - rstart; 3126 PetscCall(ISRestoreIndices(isrow, &is_idx)); 3127 3128 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, m, idx, PETSC_OWN_POINTER, isrow_d)); 3129 PetscCall(ISGetBlockSize(isrow, &i)); 3130 PetscCall(ISSetBlockSize(*isrow_d, i)); 3131 3132 /* (2) Scatter x and cmap using aij->Mvctx to get their off-process portions (see MatMult_MPIAIJ) */ 3133 PetscCall(VecScatterBegin(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3134 PetscCall(VecScatterEnd(Mvctx, x, lvec, INSERT_VALUES, SCATTER_FORWARD)); 3135 3136 PetscCall(VecDuplicate(lvec, &lcmap)); 3137 3138 PetscCall(VecScatterBegin(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3139 PetscCall(VecScatterEnd(Mvctx, cmap, lcmap, INSERT_VALUES, SCATTER_FORWARD)); 3140 3141 /* (3) create sequential iscol_o (a subset of iscol) and isgarray */ 3142 /* off-process column indices */ 3143 count = 0; 3144 PetscCall(PetscMalloc1(Bn, &idx)); 3145 PetscCall(PetscMalloc1(Bn, &cmap1)); 3146 3147 PetscCall(VecGetArray(lvec, &xarray)); 3148 PetscCall(VecGetArray(lcmap, &cmaparray)); 3149 for (i = 0; i < Bn; i++) { 3150 if (PetscRealPart(xarray[i]) > -1.0) { 3151 idx[count] = i; /* local column index in off-diagonal part B */ 3152 cmap1[count] = (PetscInt)PetscRealPart(cmaparray[i]); /* column index in submat */ 3153 count++; 3154 } 3155 } 3156 PetscCall(VecRestoreArray(lvec, &xarray)); 3157 PetscCall(VecRestoreArray(lcmap, &cmaparray)); 3158 3159 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_COPY_VALUES, iscol_o)); 3160 /* cannot ensure iscol_o has same blocksize as iscol! */ 3161 3162 PetscCall(PetscFree(idx)); 3163 *garray = cmap1; 3164 3165 PetscCall(VecDestroy(&x)); 3166 PetscCall(VecDestroy(&cmap)); 3167 PetscCall(VecDestroy(&lcmap)); 3168 PetscFunctionReturn(0); 3169 } 3170 3171 /* isrow and iscol have same processor distribution as mat, output *submat is a submatrix of local mat */ 3172 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowColDist(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *submat) { 3173 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data, *asub; 3174 Mat M = NULL; 3175 MPI_Comm comm; 3176 IS iscol_d, isrow_d, iscol_o; 3177 Mat Asub = NULL, Bsub = NULL; 3178 PetscInt n; 3179 3180 PetscFunctionBegin; 3181 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3182 3183 if (call == MAT_REUSE_MATRIX) { 3184 /* Retrieve isrow_d, iscol_d and iscol_o from submat */ 3185 PetscCall(PetscObjectQuery((PetscObject)*submat, "isrow_d", (PetscObject *)&isrow_d)); 3186 PetscCheck(isrow_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "isrow_d passed in was not used before, cannot reuse"); 3187 3188 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_d", (PetscObject *)&iscol_d)); 3189 PetscCheck(iscol_d, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_d passed in was not used before, cannot reuse"); 3190 3191 PetscCall(PetscObjectQuery((PetscObject)*submat, "iscol_o", (PetscObject *)&iscol_o)); 3192 PetscCheck(iscol_o, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "iscol_o passed in was not used before, cannot reuse"); 3193 3194 /* Update diagonal and off-diagonal portions of submat */ 3195 asub = (Mat_MPIAIJ *)(*submat)->data; 3196 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->A)); 3197 PetscCall(ISGetLocalSize(iscol_o, &n)); 3198 if (n) { PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_REUSE_MATRIX, &asub->B)); } 3199 PetscCall(MatAssemblyBegin(*submat, MAT_FINAL_ASSEMBLY)); 3200 PetscCall(MatAssemblyEnd(*submat, MAT_FINAL_ASSEMBLY)); 3201 3202 } else { /* call == MAT_INITIAL_MATRIX) */ 3203 const PetscInt *garray; 3204 PetscInt BsubN; 3205 3206 /* Create isrow_d, iscol_d, iscol_o and isgarray (replace isgarray with array?) */ 3207 PetscCall(ISGetSeqIS_SameColDist_Private(mat, isrow, iscol, &isrow_d, &iscol_d, &iscol_o, &garray)); 3208 3209 /* Create local submatrices Asub and Bsub */ 3210 PetscCall(MatCreateSubMatrix_SeqAIJ(a->A, isrow_d, iscol_d, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Asub)); 3211 PetscCall(MatCreateSubMatrix_SeqAIJ(a->B, isrow_d, iscol_o, PETSC_DECIDE, MAT_INITIAL_MATRIX, &Bsub)); 3212 3213 /* Create submatrix M */ 3214 PetscCall(MatCreateMPIAIJWithSeqAIJ(comm, Asub, Bsub, garray, &M)); 3215 3216 /* If Bsub has empty columns, compress iscol_o such that it will retrieve condensed Bsub from a->B during reuse */ 3217 asub = (Mat_MPIAIJ *)M->data; 3218 3219 PetscCall(ISGetLocalSize(iscol_o, &BsubN)); 3220 n = asub->B->cmap->N; 3221 if (BsubN > n) { 3222 /* This case can be tested using ~petsc/src/tao/bound/tutorials/runplate2_3 */ 3223 const PetscInt *idx; 3224 PetscInt i, j, *idx_new, *subgarray = asub->garray; 3225 PetscCall(PetscInfo(M, "submatrix Bn %" PetscInt_FMT " != BsubN %" PetscInt_FMT ", update iscol_o\n", n, BsubN)); 3226 3227 PetscCall(PetscMalloc1(n, &idx_new)); 3228 j = 0; 3229 PetscCall(ISGetIndices(iscol_o, &idx)); 3230 for (i = 0; i < n; i++) { 3231 if (j >= BsubN) break; 3232 while (subgarray[i] > garray[j]) j++; 3233 3234 if (subgarray[i] == garray[j]) { 3235 idx_new[i] = idx[j++]; 3236 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "subgarray[%" PetscInt_FMT "]=%" PetscInt_FMT " cannot < garray[%" PetscInt_FMT "]=%" PetscInt_FMT, i, subgarray[i], j, garray[j]); 3237 } 3238 PetscCall(ISRestoreIndices(iscol_o, &idx)); 3239 3240 PetscCall(ISDestroy(&iscol_o)); 3241 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idx_new, PETSC_OWN_POINTER, &iscol_o)); 3242 3243 } else if (BsubN < n) { 3244 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Columns of Bsub (%" PetscInt_FMT ") cannot be smaller than B's (%" PetscInt_FMT ")", BsubN, asub->B->cmap->N); 3245 } 3246 3247 PetscCall(PetscFree(garray)); 3248 *submat = M; 3249 3250 /* Save isrow_d, iscol_d and iscol_o used in processor for next request */ 3251 PetscCall(PetscObjectCompose((PetscObject)M, "isrow_d", (PetscObject)isrow_d)); 3252 PetscCall(ISDestroy(&isrow_d)); 3253 3254 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_d", (PetscObject)iscol_d)); 3255 PetscCall(ISDestroy(&iscol_d)); 3256 3257 PetscCall(PetscObjectCompose((PetscObject)M, "iscol_o", (PetscObject)iscol_o)); 3258 PetscCall(ISDestroy(&iscol_o)); 3259 } 3260 PetscFunctionReturn(0); 3261 } 3262 3263 PetscErrorCode MatCreateSubMatrix_MPIAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 3264 IS iscol_local = NULL, isrow_d; 3265 PetscInt csize; 3266 PetscInt n, i, j, start, end; 3267 PetscBool sameRowDist = PETSC_FALSE, sameDist[2], tsameDist[2]; 3268 MPI_Comm comm; 3269 3270 PetscFunctionBegin; 3271 /* If isrow has same processor distribution as mat, 3272 call MatCreateSubMatrix_MPIAIJ_SameRowDist() to avoid using a hash table with global size of iscol */ 3273 if (call == MAT_REUSE_MATRIX) { 3274 PetscCall(PetscObjectQuery((PetscObject)*newmat, "isrow_d", (PetscObject *)&isrow_d)); 3275 if (isrow_d) { 3276 sameRowDist = PETSC_TRUE; 3277 tsameDist[1] = PETSC_TRUE; /* sameColDist */ 3278 } else { 3279 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_local)); 3280 if (iscol_local) { 3281 sameRowDist = PETSC_TRUE; 3282 tsameDist[1] = PETSC_FALSE; /* !sameColDist */ 3283 } 3284 } 3285 } else { 3286 /* Check if isrow has same processor distribution as mat */ 3287 sameDist[0] = PETSC_FALSE; 3288 PetscCall(ISGetLocalSize(isrow, &n)); 3289 if (!n) { 3290 sameDist[0] = PETSC_TRUE; 3291 } else { 3292 PetscCall(ISGetMinMax(isrow, &i, &j)); 3293 PetscCall(MatGetOwnershipRange(mat, &start, &end)); 3294 if (i >= start && j < end) { sameDist[0] = PETSC_TRUE; } 3295 } 3296 3297 /* Check if iscol has same processor distribution as mat */ 3298 sameDist[1] = PETSC_FALSE; 3299 PetscCall(ISGetLocalSize(iscol, &n)); 3300 if (!n) { 3301 sameDist[1] = PETSC_TRUE; 3302 } else { 3303 PetscCall(ISGetMinMax(iscol, &i, &j)); 3304 PetscCall(MatGetOwnershipRangeColumn(mat, &start, &end)); 3305 if (i >= start && j < end) sameDist[1] = PETSC_TRUE; 3306 } 3307 3308 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3309 PetscCall(MPIU_Allreduce(&sameDist, &tsameDist, 2, MPIU_BOOL, MPI_LAND, comm)); 3310 sameRowDist = tsameDist[0]; 3311 } 3312 3313 if (sameRowDist) { 3314 if (tsameDist[1]) { /* sameRowDist & sameColDist */ 3315 /* isrow and iscol have same processor distribution as mat */ 3316 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowColDist(mat, isrow, iscol, call, newmat)); 3317 PetscFunctionReturn(0); 3318 } else { /* sameRowDist */ 3319 /* isrow has same processor distribution as mat */ 3320 if (call == MAT_INITIAL_MATRIX) { 3321 PetscBool sorted; 3322 PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); 3323 PetscCall(ISGetLocalSize(iscol_local, &n)); /* local size of iscol_local = global columns of newmat */ 3324 PetscCall(ISGetSize(iscol, &i)); 3325 PetscCheck(n == i, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "n %" PetscInt_FMT " != size of iscol %" PetscInt_FMT, n, i); 3326 3327 PetscCall(ISSorted(iscol_local, &sorted)); 3328 if (sorted) { 3329 /* MatCreateSubMatrix_MPIAIJ_SameRowDist() requires iscol_local be sorted; it can have duplicate indices */ 3330 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, iscol_local, MAT_INITIAL_MATRIX, newmat)); 3331 PetscFunctionReturn(0); 3332 } 3333 } else { /* call == MAT_REUSE_MATRIX */ 3334 IS iscol_sub; 3335 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3336 if (iscol_sub) { 3337 PetscCall(MatCreateSubMatrix_MPIAIJ_SameRowDist(mat, isrow, iscol, NULL, call, newmat)); 3338 PetscFunctionReturn(0); 3339 } 3340 } 3341 } 3342 } 3343 3344 /* General case: iscol -> iscol_local which has global size of iscol */ 3345 if (call == MAT_REUSE_MATRIX) { 3346 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 3347 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3348 } else { 3349 if (!iscol_local) { PetscCall(ISGetSeqIS_Private(mat, iscol, &iscol_local)); } 3350 } 3351 3352 PetscCall(ISGetLocalSize(iscol, &csize)); 3353 PetscCall(MatCreateSubMatrix_MPIAIJ_nonscalable(mat, isrow, iscol_local, csize, call, newmat)); 3354 3355 if (call == MAT_INITIAL_MATRIX) { 3356 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 3357 PetscCall(ISDestroy(&iscol_local)); 3358 } 3359 PetscFunctionReturn(0); 3360 } 3361 3362 /*@C 3363 MatCreateMPIAIJWithSeqAIJ - creates a MPIAIJ matrix using SeqAIJ matrices that contain the "diagonal" 3364 and "off-diagonal" part of the matrix in CSR format. 3365 3366 Collective 3367 3368 Input Parameters: 3369 + comm - MPI communicator 3370 . A - "diagonal" portion of matrix 3371 . B - "off-diagonal" portion of matrix, may have empty columns, will be destroyed by this routine 3372 - garray - global index of B columns 3373 3374 Output Parameter: 3375 . mat - the matrix, with input A as its local diagonal matrix 3376 Level: advanced 3377 3378 Notes: 3379 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix. 3380 A becomes part of output mat, B is destroyed by this routine. The user cannot use A and B anymore. 3381 3382 .seealso: `MatCreateMPIAIJWithSplitArrays()` 3383 @*/ 3384 PetscErrorCode MatCreateMPIAIJWithSeqAIJ(MPI_Comm comm, Mat A, Mat B, const PetscInt garray[], Mat *mat) { 3385 Mat_MPIAIJ *maij; 3386 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data, *bnew; 3387 PetscInt *oi = b->i, *oj = b->j, i, nz, col; 3388 const PetscScalar *oa; 3389 Mat Bnew; 3390 PetscInt m, n, N; 3391 MatType mpi_mat_type; 3392 3393 PetscFunctionBegin; 3394 PetscCall(MatCreate(comm, mat)); 3395 PetscCall(MatGetSize(A, &m, &n)); 3396 PetscCheck(m == B->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Am %" PetscInt_FMT " != Bm %" PetscInt_FMT, m, B->rmap->N); 3397 PetscCheck(A->rmap->bs == B->rmap->bs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "A row bs %" PetscInt_FMT " != B row bs %" PetscInt_FMT, A->rmap->bs, B->rmap->bs); 3398 /* remove check below; When B is created using iscol_o from ISGetSeqIS_SameColDist_Private(), its bs may not be same as A */ 3399 /* PetscCheck(A->cmap->bs == B->cmap->bs,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"A column bs %" PetscInt_FMT " != B column bs %" PetscInt_FMT,A->cmap->bs,B->cmap->bs); */ 3400 3401 /* Get global columns of mat */ 3402 PetscCall(MPIU_Allreduce(&n, &N, 1, MPIU_INT, MPI_SUM, comm)); 3403 3404 PetscCall(MatSetSizes(*mat, m, n, PETSC_DECIDE, N)); 3405 /* Determine the type of MPI matrix that should be created from the type of matrix A, which holds the "diagonal" portion. */ 3406 PetscCall(MatGetMPIMatType_Private(A, &mpi_mat_type)); 3407 PetscCall(MatSetType(*mat, mpi_mat_type)); 3408 3409 PetscCall(MatSetBlockSizes(*mat, A->rmap->bs, A->cmap->bs)); 3410 maij = (Mat_MPIAIJ *)(*mat)->data; 3411 3412 (*mat)->preallocated = PETSC_TRUE; 3413 3414 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 3415 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 3416 3417 /* Set A as diagonal portion of *mat */ 3418 maij->A = A; 3419 3420 nz = oi[m]; 3421 for (i = 0; i < nz; i++) { 3422 col = oj[i]; 3423 oj[i] = garray[col]; 3424 } 3425 3426 /* Set Bnew as off-diagonal portion of *mat */ 3427 PetscCall(MatSeqAIJGetArrayRead(B, &oa)); 3428 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, N, oi, oj, (PetscScalar *)oa, &Bnew)); 3429 PetscCall(MatSeqAIJRestoreArrayRead(B, &oa)); 3430 bnew = (Mat_SeqAIJ *)Bnew->data; 3431 bnew->maxnz = b->maxnz; /* allocated nonzeros of B */ 3432 maij->B = Bnew; 3433 3434 PetscCheck(B->rmap->N == Bnew->rmap->N, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BN %" PetscInt_FMT " != BnewN %" PetscInt_FMT, B->rmap->N, Bnew->rmap->N); 3435 3436 b->singlemalloc = PETSC_FALSE; /* B arrays are shared by Bnew */ 3437 b->free_a = PETSC_FALSE; 3438 b->free_ij = PETSC_FALSE; 3439 PetscCall(MatDestroy(&B)); 3440 3441 bnew->singlemalloc = PETSC_TRUE; /* arrays will be freed by MatDestroy(&Bnew) */ 3442 bnew->free_a = PETSC_TRUE; 3443 bnew->free_ij = PETSC_TRUE; 3444 3445 /* condense columns of maij->B */ 3446 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3447 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 3448 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 3449 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 3450 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3451 PetscFunctionReturn(0); 3452 } 3453 3454 extern PetscErrorCode MatCreateSubMatrices_MPIAIJ_SingleIS_Local(Mat, PetscInt, const IS[], const IS[], MatReuse, PetscBool, Mat *); 3455 3456 PetscErrorCode MatCreateSubMatrix_MPIAIJ_SameRowDist(Mat mat, IS isrow, IS iscol, IS iscol_local, MatReuse call, Mat *newmat) { 3457 PetscInt i, m, n, rstart, row, rend, nz, j, bs, cbs; 3458 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3459 Mat_MPIAIJ *a = (Mat_MPIAIJ *)mat->data; 3460 Mat M, Msub, B = a->B; 3461 MatScalar *aa; 3462 Mat_SeqAIJ *aij; 3463 PetscInt *garray = a->garray, *colsub, Ncols; 3464 PetscInt count, Bn = B->cmap->N, cstart = mat->cmap->rstart, cend = mat->cmap->rend; 3465 IS iscol_sub, iscmap; 3466 const PetscInt *is_idx, *cmap; 3467 PetscBool allcolumns = PETSC_FALSE; 3468 MPI_Comm comm; 3469 3470 PetscFunctionBegin; 3471 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3472 if (call == MAT_REUSE_MATRIX) { 3473 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubIScol", (PetscObject *)&iscol_sub)); 3474 PetscCheck(iscol_sub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "SubIScol passed in was not used before, cannot reuse"); 3475 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3476 3477 PetscCall(PetscObjectQuery((PetscObject)*newmat, "Subcmap", (PetscObject *)&iscmap)); 3478 PetscCheck(iscmap, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Subcmap passed in was not used before, cannot reuse"); 3479 3480 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Msub)); 3481 PetscCheck(Msub, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3482 3483 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_REUSE_MATRIX, PETSC_FALSE, &Msub)); 3484 3485 } else { /* call == MAT_INITIAL_MATRIX) */ 3486 PetscBool flg; 3487 3488 PetscCall(ISGetLocalSize(iscol, &n)); 3489 PetscCall(ISGetSize(iscol, &Ncols)); 3490 3491 /* (1) iscol -> nonscalable iscol_local */ 3492 /* Check for special case: each processor gets entire matrix columns */ 3493 PetscCall(ISIdentity(iscol_local, &flg)); 3494 if (flg && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3495 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3496 if (allcolumns) { 3497 iscol_sub = iscol_local; 3498 PetscCall(PetscObjectReference((PetscObject)iscol_local)); 3499 PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &iscmap)); 3500 3501 } else { 3502 /* (2) iscol_local -> iscol_sub and iscmap. Implementation below requires iscol_local be sorted, it can have duplicate indices */ 3503 PetscInt *idx, *cmap1, k; 3504 PetscCall(PetscMalloc1(Ncols, &idx)); 3505 PetscCall(PetscMalloc1(Ncols, &cmap1)); 3506 PetscCall(ISGetIndices(iscol_local, &is_idx)); 3507 count = 0; 3508 k = 0; 3509 for (i = 0; i < Ncols; i++) { 3510 j = is_idx[i]; 3511 if (j >= cstart && j < cend) { 3512 /* diagonal part of mat */ 3513 idx[count] = j; 3514 cmap1[count++] = i; /* column index in submat */ 3515 } else if (Bn) { 3516 /* off-diagonal part of mat */ 3517 if (j == garray[k]) { 3518 idx[count] = j; 3519 cmap1[count++] = i; /* column index in submat */ 3520 } else if (j > garray[k]) { 3521 while (j > garray[k] && k < Bn - 1) k++; 3522 if (j == garray[k]) { 3523 idx[count] = j; 3524 cmap1[count++] = i; /* column index in submat */ 3525 } 3526 } 3527 } 3528 } 3529 PetscCall(ISRestoreIndices(iscol_local, &is_idx)); 3530 3531 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, count, idx, PETSC_OWN_POINTER, &iscol_sub)); 3532 PetscCall(ISGetBlockSize(iscol, &cbs)); 3533 PetscCall(ISSetBlockSize(iscol_sub, cbs)); 3534 3535 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)iscol_local), count, cmap1, PETSC_OWN_POINTER, &iscmap)); 3536 } 3537 3538 /* (3) Create sequential Msub */ 3539 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol_sub, MAT_INITIAL_MATRIX, allcolumns, &Msub)); 3540 } 3541 3542 PetscCall(ISGetLocalSize(iscol_sub, &count)); 3543 aij = (Mat_SeqAIJ *)(Msub)->data; 3544 ii = aij->i; 3545 PetscCall(ISGetIndices(iscmap, &cmap)); 3546 3547 /* 3548 m - number of local rows 3549 Ncols - number of columns (same on all processors) 3550 rstart - first row in new global matrix generated 3551 */ 3552 PetscCall(MatGetSize(Msub, &m, NULL)); 3553 3554 if (call == MAT_INITIAL_MATRIX) { 3555 /* (4) Create parallel newmat */ 3556 PetscMPIInt rank, size; 3557 PetscInt csize; 3558 3559 PetscCallMPI(MPI_Comm_size(comm, &size)); 3560 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3561 3562 /* 3563 Determine the number of non-zeros in the diagonal and off-diagonal 3564 portions of the matrix in order to do correct preallocation 3565 */ 3566 3567 /* first get start and end of "diagonal" columns */ 3568 PetscCall(ISGetLocalSize(iscol, &csize)); 3569 if (csize == PETSC_DECIDE) { 3570 PetscCall(ISGetSize(isrow, &mglobal)); 3571 if (mglobal == Ncols) { /* square matrix */ 3572 nlocal = m; 3573 } else { 3574 nlocal = Ncols / size + ((Ncols % size) > rank); 3575 } 3576 } else { 3577 nlocal = csize; 3578 } 3579 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3580 rstart = rend - nlocal; 3581 PetscCheck(rank != size - 1 || rend == Ncols, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, Ncols); 3582 3583 /* next, compute all the lengths */ 3584 jj = aij->j; 3585 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3586 olens = dlens + m; 3587 for (i = 0; i < m; i++) { 3588 jend = ii[i + 1] - ii[i]; 3589 olen = 0; 3590 dlen = 0; 3591 for (j = 0; j < jend; j++) { 3592 if (cmap[*jj] < rstart || cmap[*jj] >= rend) olen++; 3593 else dlen++; 3594 jj++; 3595 } 3596 olens[i] = olen; 3597 dlens[i] = dlen; 3598 } 3599 3600 PetscCall(ISGetBlockSize(isrow, &bs)); 3601 PetscCall(ISGetBlockSize(iscol, &cbs)); 3602 3603 PetscCall(MatCreate(comm, &M)); 3604 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, Ncols)); 3605 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3606 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3607 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3608 PetscCall(PetscFree(dlens)); 3609 3610 } else { /* call == MAT_REUSE_MATRIX */ 3611 M = *newmat; 3612 PetscCall(MatGetLocalSize(M, &i, NULL)); 3613 PetscCheck(i == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3614 PetscCall(MatZeroEntries(M)); 3615 /* 3616 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3617 rather than the slower MatSetValues(). 3618 */ 3619 M->was_assembled = PETSC_TRUE; 3620 M->assembled = PETSC_FALSE; 3621 } 3622 3623 /* (5) Set values of Msub to *newmat */ 3624 PetscCall(PetscMalloc1(count, &colsub)); 3625 PetscCall(MatGetOwnershipRange(M, &rstart, NULL)); 3626 3627 jj = aij->j; 3628 PetscCall(MatSeqAIJGetArrayRead(Msub, (const PetscScalar **)&aa)); 3629 for (i = 0; i < m; i++) { 3630 row = rstart + i; 3631 nz = ii[i + 1] - ii[i]; 3632 for (j = 0; j < nz; j++) colsub[j] = cmap[jj[j]]; 3633 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, colsub, aa, INSERT_VALUES)); 3634 jj += nz; 3635 aa += nz; 3636 } 3637 PetscCall(MatSeqAIJRestoreArrayRead(Msub, (const PetscScalar **)&aa)); 3638 PetscCall(ISRestoreIndices(iscmap, &cmap)); 3639 3640 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3641 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3642 3643 PetscCall(PetscFree(colsub)); 3644 3645 /* save Msub, iscol_sub and iscmap used in processor for next request */ 3646 if (call == MAT_INITIAL_MATRIX) { 3647 *newmat = M; 3648 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubMatrix", (PetscObject)Msub)); 3649 PetscCall(MatDestroy(&Msub)); 3650 3651 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "SubIScol", (PetscObject)iscol_sub)); 3652 PetscCall(ISDestroy(&iscol_sub)); 3653 3654 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "Subcmap", (PetscObject)iscmap)); 3655 PetscCall(ISDestroy(&iscmap)); 3656 3657 if (iscol_local) { 3658 PetscCall(PetscObjectCompose((PetscObject)(*newmat), "ISAllGather", (PetscObject)iscol_local)); 3659 PetscCall(ISDestroy(&iscol_local)); 3660 } 3661 } 3662 PetscFunctionReturn(0); 3663 } 3664 3665 /* 3666 Not great since it makes two copies of the submatrix, first an SeqAIJ 3667 in local and then by concatenating the local matrices the end result. 3668 Writing it directly would be much like MatCreateSubMatrices_MPIAIJ() 3669 3670 Note: This requires a sequential iscol with all indices. 3671 */ 3672 PetscErrorCode MatCreateSubMatrix_MPIAIJ_nonscalable(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 3673 PetscMPIInt rank, size; 3674 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs, cbs; 3675 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 3676 Mat M, Mreuse; 3677 MatScalar *aa, *vwork; 3678 MPI_Comm comm; 3679 Mat_SeqAIJ *aij; 3680 PetscBool colflag, allcolumns = PETSC_FALSE; 3681 3682 PetscFunctionBegin; 3683 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 3684 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3685 PetscCallMPI(MPI_Comm_size(comm, &size)); 3686 3687 /* Check for special case: each processor gets entire matrix columns */ 3688 PetscCall(ISIdentity(iscol, &colflag)); 3689 PetscCall(ISGetLocalSize(iscol, &n)); 3690 if (colflag && n == mat->cmap->N) allcolumns = PETSC_TRUE; 3691 PetscCall(MPIU_Allreduce(MPI_IN_PLACE, &allcolumns, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 3692 3693 if (call == MAT_REUSE_MATRIX) { 3694 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 3695 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 3696 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_REUSE_MATRIX, allcolumns, &Mreuse)); 3697 } else { 3698 PetscCall(MatCreateSubMatrices_MPIAIJ_SingleIS_Local(mat, 1, &isrow, &iscol, MAT_INITIAL_MATRIX, allcolumns, &Mreuse)); 3699 } 3700 3701 /* 3702 m - number of local rows 3703 n - number of columns (same on all processors) 3704 rstart - first row in new global matrix generated 3705 */ 3706 PetscCall(MatGetSize(Mreuse, &m, &n)); 3707 PetscCall(MatGetBlockSizes(Mreuse, &bs, &cbs)); 3708 if (call == MAT_INITIAL_MATRIX) { 3709 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3710 ii = aij->i; 3711 jj = aij->j; 3712 3713 /* 3714 Determine the number of non-zeros in the diagonal and off-diagonal 3715 portions of the matrix in order to do correct preallocation 3716 */ 3717 3718 /* first get start and end of "diagonal" columns */ 3719 if (csize == PETSC_DECIDE) { 3720 PetscCall(ISGetSize(isrow, &mglobal)); 3721 if (mglobal == n) { /* square matrix */ 3722 nlocal = m; 3723 } else { 3724 nlocal = n / size + ((n % size) > rank); 3725 } 3726 } else { 3727 nlocal = csize; 3728 } 3729 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 3730 rstart = rend - nlocal; 3731 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 3732 3733 /* next, compute all the lengths */ 3734 PetscCall(PetscMalloc1(2 * m + 1, &dlens)); 3735 olens = dlens + m; 3736 for (i = 0; i < m; i++) { 3737 jend = ii[i + 1] - ii[i]; 3738 olen = 0; 3739 dlen = 0; 3740 for (j = 0; j < jend; j++) { 3741 if (*jj < rstart || *jj >= rend) olen++; 3742 else dlen++; 3743 jj++; 3744 } 3745 olens[i] = olen; 3746 dlens[i] = dlen; 3747 } 3748 PetscCall(MatCreate(comm, &M)); 3749 PetscCall(MatSetSizes(M, m, nlocal, PETSC_DECIDE, n)); 3750 PetscCall(MatSetBlockSizes(M, bs, cbs)); 3751 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 3752 PetscCall(MatMPIAIJSetPreallocation(M, 0, dlens, 0, olens)); 3753 PetscCall(PetscFree(dlens)); 3754 } else { 3755 PetscInt ml, nl; 3756 3757 M = *newmat; 3758 PetscCall(MatGetLocalSize(M, &ml, &nl)); 3759 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 3760 PetscCall(MatZeroEntries(M)); 3761 /* 3762 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 3763 rather than the slower MatSetValues(). 3764 */ 3765 M->was_assembled = PETSC_TRUE; 3766 M->assembled = PETSC_FALSE; 3767 } 3768 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 3769 aij = (Mat_SeqAIJ *)(Mreuse)->data; 3770 ii = aij->i; 3771 jj = aij->j; 3772 3773 /* trigger copy to CPU if needed */ 3774 PetscCall(MatSeqAIJGetArrayRead(Mreuse, (const PetscScalar **)&aa)); 3775 for (i = 0; i < m; i++) { 3776 row = rstart + i; 3777 nz = ii[i + 1] - ii[i]; 3778 cwork = jj; 3779 jj += nz; 3780 vwork = aa; 3781 aa += nz; 3782 PetscCall(MatSetValues_MPIAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 3783 } 3784 PetscCall(MatSeqAIJRestoreArrayRead(Mreuse, (const PetscScalar **)&aa)); 3785 3786 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 3787 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 3788 *newmat = M; 3789 3790 /* save submatrix used in processor for next request */ 3791 if (call == MAT_INITIAL_MATRIX) { 3792 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 3793 PetscCall(MatDestroy(&Mreuse)); 3794 } 3795 PetscFunctionReturn(0); 3796 } 3797 3798 PetscErrorCode MatMPIAIJSetPreallocationCSR_MPIAIJ(Mat B, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 3799 PetscInt m, cstart, cend, j, nnz, i, d, *ld; 3800 PetscInt *d_nnz, *o_nnz, nnz_max = 0, rstart, ii; 3801 const PetscInt *JJ; 3802 PetscBool nooffprocentries; 3803 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)B->data; 3804 3805 PetscFunctionBegin; 3806 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Ii[0] must be 0 it is %" PetscInt_FMT, Ii[0]); 3807 3808 PetscCall(PetscLayoutSetUp(B->rmap)); 3809 PetscCall(PetscLayoutSetUp(B->cmap)); 3810 m = B->rmap->n; 3811 cstart = B->cmap->rstart; 3812 cend = B->cmap->rend; 3813 rstart = B->rmap->rstart; 3814 3815 PetscCall(PetscCalloc2(m, &d_nnz, m, &o_nnz)); 3816 3817 if (PetscDefined(USE_DEBUG)) { 3818 for (i = 0; i < m; i++) { 3819 nnz = Ii[i + 1] - Ii[i]; 3820 JJ = J + Ii[i]; 3821 PetscCheck(nnz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative %" PetscInt_FMT " number of columns", i, nnz); 3822 PetscCheck(!nnz || !(JJ[0] < 0), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " starts with negative column index %" PetscInt_FMT, i, JJ[0]); 3823 PetscCheck(!nnz || !(JJ[nnz - 1] >= B->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Row %" PetscInt_FMT " ends with too large a column index %" PetscInt_FMT " (max allowed %" PetscInt_FMT ")", i, JJ[nnz - 1], B->cmap->N); 3824 } 3825 } 3826 3827 for (i = 0; i < m; i++) { 3828 nnz = Ii[i + 1] - Ii[i]; 3829 JJ = J + Ii[i]; 3830 nnz_max = PetscMax(nnz_max, nnz); 3831 d = 0; 3832 for (j = 0; j < nnz; j++) { 3833 if (cstart <= JJ[j] && JJ[j] < cend) d++; 3834 } 3835 d_nnz[i] = d; 3836 o_nnz[i] = nnz - d; 3837 } 3838 PetscCall(MatMPIAIJSetPreallocation(B, 0, d_nnz, 0, o_nnz)); 3839 PetscCall(PetscFree2(d_nnz, o_nnz)); 3840 3841 for (i = 0; i < m; i++) { 3842 ii = i + rstart; 3843 PetscCall(MatSetValues_MPIAIJ(B, 1, &ii, Ii[i + 1] - Ii[i], J + Ii[i], v ? v + Ii[i] : NULL, INSERT_VALUES)); 3844 } 3845 nooffprocentries = B->nooffprocentries; 3846 B->nooffprocentries = PETSC_TRUE; 3847 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 3848 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 3849 B->nooffprocentries = nooffprocentries; 3850 3851 /* count number of entries below block diagonal */ 3852 PetscCall(PetscFree(Aij->ld)); 3853 PetscCall(PetscCalloc1(m, &ld)); 3854 Aij->ld = ld; 3855 for (i = 0; i < m; i++) { 3856 nnz = Ii[i + 1] - Ii[i]; 3857 j = 0; 3858 while (j < nnz && J[j] < cstart) { j++; } 3859 ld[i] = j; 3860 J += nnz; 3861 } 3862 3863 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 3864 PetscFunctionReturn(0); 3865 } 3866 3867 /*@ 3868 MatMPIAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in AIJ format 3869 (the default parallel PETSc format). 3870 3871 Collective 3872 3873 Input Parameters: 3874 + B - the matrix 3875 . i - the indices into j for the start of each local row (starts with zero) 3876 . j - the column indices for each local row (starts with zero) 3877 - v - optional values in the matrix 3878 3879 Level: developer 3880 3881 Notes: 3882 The i, j, and v arrays ARE copied by this routine into the internal format used by PETSc; 3883 thus you CANNOT change the matrix entries by changing the values of v[] after you have 3884 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 3885 3886 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3887 3888 The format which is used for the sparse matrix input, is equivalent to a 3889 row-major ordering.. i.e for the following matrix, the input data expected is 3890 as shown 3891 3892 $ 1 0 0 3893 $ 2 0 3 P0 3894 $ ------- 3895 $ 4 5 6 P1 3896 $ 3897 $ Process0 [P0]: rows_owned=[0,1] 3898 $ i = {0,1,3} [size = nrow+1 = 2+1] 3899 $ j = {0,0,2} [size = 3] 3900 $ v = {1,2,3} [size = 3] 3901 $ 3902 $ Process1 [P1]: rows_owned=[2] 3903 $ i = {0,3} [size = nrow+1 = 1+1] 3904 $ j = {0,1,2} [size = 3] 3905 $ v = {4,5,6} [size = 3] 3906 3907 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, 3908 `MatCreateSeqAIJWithArrays()`, `MatCreateMPIAIJWithSplitArrays()` 3909 @*/ 3910 PetscErrorCode MatMPIAIJSetPreallocationCSR(Mat B, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 3911 PetscFunctionBegin; 3912 PetscTryMethod(B, "MatMPIAIJSetPreallocationCSR_C", (Mat, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, i, j, v)); 3913 PetscFunctionReturn(0); 3914 } 3915 3916 /*@C 3917 MatMPIAIJSetPreallocation - Preallocates memory for a sparse parallel matrix in AIJ format 3918 (the default parallel PETSc format). For good matrix assembly performance 3919 the user should preallocate the matrix storage by setting the parameters 3920 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3921 performance can be increased by more than a factor of 50. 3922 3923 Collective 3924 3925 Input Parameters: 3926 + B - the matrix 3927 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 3928 (same value is used for all local rows) 3929 . d_nnz - array containing the number of nonzeros in the various rows of the 3930 DIAGONAL portion of the local submatrix (possibly different for each row) 3931 or NULL (PETSC_NULL_INTEGER in Fortran), if d_nz is used to specify the nonzero structure. 3932 The size of this array is equal to the number of local rows, i.e 'm'. 3933 For matrices that will be factored, you must leave room for (and set) 3934 the diagonal entry even if it is zero. 3935 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 3936 submatrix (same value is used for all local rows). 3937 - o_nnz - array containing the number of nonzeros in the various rows of the 3938 OFF-DIAGONAL portion of the local submatrix (possibly different for 3939 each row) or NULL (PETSC_NULL_INTEGER in Fortran), if o_nz is used to specify the nonzero 3940 structure. The size of this array is equal to the number 3941 of local rows, i.e 'm'. 3942 3943 If the *_nnz parameter is given then the *_nz parameter is ignored 3944 3945 The AIJ format (also called the Yale sparse matrix format or 3946 compressed row storage (CSR)), is fully compatible with standard Fortran 77 3947 storage. The stored row and column indices begin with zero. 3948 See Users-Manual: ch_mat for details. 3949 3950 The parallel matrix is partitioned such that the first m0 rows belong to 3951 process 0, the next m1 rows belong to process 1, the next m2 rows belong 3952 to process 2 etc.. where m0,m1,m2... are the input parameter 'm'. 3953 3954 The DIAGONAL portion of the local submatrix of a processor can be defined 3955 as the submatrix which is obtained by extraction the part corresponding to 3956 the rows r1-r2 and columns c1-c2 of the global matrix, where r1 is the 3957 first row that belongs to the processor, r2 is the last row belonging to 3958 the this processor, and c1-c2 is range of indices of the local part of a 3959 vector suitable for applying the matrix to. This is an mxn matrix. In the 3960 common case of a square matrix, the row and column ranges are the same and 3961 the DIAGONAL part is also square. The remaining portion of the local 3962 submatrix (mxN) constitute the OFF-DIAGONAL portion. 3963 3964 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 3965 3966 You can call MatGetInfo() to get information on how effective the preallocation was; 3967 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3968 You can also run with the option -info and look for messages with the string 3969 malloc in them to see if additional memory allocation was needed. 3970 3971 Example usage: 3972 3973 Consider the following 8x8 matrix with 34 non-zero values, that is 3974 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 3975 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 3976 as follows: 3977 3978 .vb 3979 1 2 0 | 0 3 0 | 0 4 3980 Proc0 0 5 6 | 7 0 0 | 8 0 3981 9 0 10 | 11 0 0 | 12 0 3982 ------------------------------------- 3983 13 0 14 | 15 16 17 | 0 0 3984 Proc1 0 18 0 | 19 20 21 | 0 0 3985 0 0 0 | 22 23 0 | 24 0 3986 ------------------------------------- 3987 Proc2 25 26 27 | 0 0 28 | 29 0 3988 30 0 0 | 31 32 33 | 0 34 3989 .ve 3990 3991 This can be represented as a collection of submatrices as: 3992 3993 .vb 3994 A B C 3995 D E F 3996 G H I 3997 .ve 3998 3999 Where the submatrices A,B,C are owned by proc0, D,E,F are 4000 owned by proc1, G,H,I are owned by proc2. 4001 4002 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4003 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4004 The 'M','N' parameters are 8,8, and have the same values on all procs. 4005 4006 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4007 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4008 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4009 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4010 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4011 matrix, ans [DF] as another SeqAIJ matrix. 4012 4013 When d_nz, o_nz parameters are specified, d_nz storage elements are 4014 allocated for every row of the local diagonal submatrix, and o_nz 4015 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4016 One way to choose d_nz and o_nz is to use the max nonzerors per local 4017 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4018 In this case, the values of d_nz,o_nz are: 4019 .vb 4020 proc0 : dnz = 2, o_nz = 2 4021 proc1 : dnz = 3, o_nz = 2 4022 proc2 : dnz = 1, o_nz = 4 4023 .ve 4024 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4025 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4026 for proc3. i.e we are using 12+15+10=37 storage locations to store 4027 34 values. 4028 4029 When d_nnz, o_nnz parameters are specified, the storage is specified 4030 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4031 In the above case the values for d_nnz,o_nnz are: 4032 .vb 4033 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4034 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4035 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4036 .ve 4037 Here the space allocated is sum of all the above values i.e 34, and 4038 hence pre-allocation is perfect. 4039 4040 Level: intermediate 4041 4042 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatCreateAIJ()`, `MatMPIAIJSetPreallocationCSR()`, 4043 `MATMPIAIJ`, `MatGetInfo()`, `PetscSplitOwnership()` 4044 @*/ 4045 PetscErrorCode MatMPIAIJSetPreallocation(Mat B, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 4046 PetscFunctionBegin; 4047 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 4048 PetscValidType(B, 1); 4049 PetscTryMethod(B, "MatMPIAIJSetPreallocation_C", (Mat, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, d_nz, d_nnz, o_nz, o_nnz)); 4050 PetscFunctionReturn(0); 4051 } 4052 4053 /*@ 4054 MatCreateMPIAIJWithArrays - creates a MPI AIJ matrix using arrays that contain in standard 4055 CSR format for the local rows. 4056 4057 Collective 4058 4059 Input Parameters: 4060 + comm - MPI communicator 4061 . m - number of local rows (Cannot be PETSC_DECIDE) 4062 . n - This value should be the same as the local size used in creating the 4063 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4064 calculated if N is given) For square matrices n is almost always m. 4065 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4066 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4067 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 4068 . j - column indices 4069 - a - optional matrix values 4070 4071 Output Parameter: 4072 . mat - the matrix 4073 4074 Level: intermediate 4075 4076 Notes: 4077 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 4078 thus you CANNOT change the matrix entries by changing the values of a[] after you have 4079 called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays. 4080 4081 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 4082 4083 The format which is used for the sparse matrix input, is equivalent to a 4084 row-major ordering.. i.e for the following matrix, the input data expected is 4085 as shown 4086 4087 Once you have created the matrix you can update it with new numerical values using MatUpdateMPIAIJWithArrays 4088 4089 $ 1 0 0 4090 $ 2 0 3 P0 4091 $ ------- 4092 $ 4 5 6 P1 4093 $ 4094 $ Process0 [P0]: rows_owned=[0,1] 4095 $ i = {0,1,3} [size = nrow+1 = 2+1] 4096 $ j = {0,0,2} [size = 3] 4097 $ v = {1,2,3} [size = 3] 4098 $ 4099 $ Process1 [P1]: rows_owned=[2] 4100 $ i = {0,3} [size = nrow+1 = 1+1] 4101 $ j = {0,1,2} [size = 3] 4102 $ v = {4,5,6} [size = 3] 4103 4104 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4105 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()` 4106 @*/ 4107 PetscErrorCode MatCreateMPIAIJWithArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 4108 PetscFunctionBegin; 4109 PetscCheck(!i || !i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4110 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4111 PetscCall(MatCreate(comm, mat)); 4112 PetscCall(MatSetSizes(*mat, m, n, M, N)); 4113 /* PetscCall(MatSetBlockSizes(M,bs,cbs)); */ 4114 PetscCall(MatSetType(*mat, MATMPIAIJ)); 4115 PetscCall(MatMPIAIJSetPreallocationCSR(*mat, i, j, a)); 4116 PetscFunctionReturn(0); 4117 } 4118 4119 /*@ 4120 MatUpdateMPIAIJWithArrays - updates a MPI AIJ matrix using arrays that contain in standard 4121 CSR format for the local rows. Only the numerical values are updated the other arrays must be identical to what was passed from MatCreateMPIAIJWithArrays() 4122 4123 Deprecated: Use `MatUpdateMPIAIJWithArray()` 4124 4125 Collective 4126 4127 Input Parameters: 4128 + mat - the matrix 4129 . m - number of local rows (Cannot be PETSC_DECIDE) 4130 . n - This value should be the same as the local size used in creating the 4131 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4132 calculated if N is given) For square matrices n is almost always m. 4133 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4134 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4135 . Ii - row indices; that is Ii[0] = 0, Ii[row] = Ii[row-1] + number of elements in that row of the matrix 4136 . J - column indices 4137 - v - matrix values 4138 4139 Level: intermediate 4140 4141 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4142 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArray()` 4143 @*/ 4144 PetscErrorCode MatUpdateMPIAIJWithArrays(Mat mat, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt Ii[], const PetscInt J[], const PetscScalar v[]) { 4145 PetscInt nnz, i; 4146 PetscBool nooffprocentries; 4147 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4148 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4149 PetscScalar *ad, *ao; 4150 PetscInt ldi, Iii, md; 4151 const PetscInt *Adi = Ad->i; 4152 PetscInt *ld = Aij->ld; 4153 4154 PetscFunctionBegin; 4155 PetscCheck(Ii[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 4156 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 4157 PetscCheck(m == mat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of rows cannot change from call to MatUpdateMPIAIJWithArrays()"); 4158 PetscCheck(n == mat->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Local number of columns cannot change from call to MatUpdateMPIAIJWithArrays()"); 4159 4160 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4161 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4162 4163 for (i = 0; i < m; i++) { 4164 nnz = Ii[i + 1] - Ii[i]; 4165 Iii = Ii[i]; 4166 ldi = ld[i]; 4167 md = Adi[i + 1] - Adi[i]; 4168 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4169 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4170 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4171 ad += md; 4172 ao += nnz - md; 4173 } 4174 nooffprocentries = mat->nooffprocentries; 4175 mat->nooffprocentries = PETSC_TRUE; 4176 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4177 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4178 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4179 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4180 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4181 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4182 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4183 mat->nooffprocentries = nooffprocentries; 4184 PetscFunctionReturn(0); 4185 } 4186 4187 /*@ 4188 MatUpdateMPIAIJWithArray - updates an MPI AIJ matrix using an array that contains the nonzero values 4189 4190 Collective 4191 4192 Input Parameters: 4193 + mat - the matrix 4194 - v - matrix values, stored by row 4195 4196 Level: intermediate 4197 4198 Notes: 4199 The matrix must have been obtained with `MatCreateMPIAIJWithArrays()` or `MatMPIAIJSetPreallocationCSR()` 4200 4201 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4202 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()`, `MatUpdateMPIAIJWithArrays()`, `MatUpdateMPIAIJWithArrays()` 4203 @*/ 4204 PetscErrorCode MatUpdateMPIAIJWithArray(Mat mat, const PetscScalar v[]) { 4205 PetscInt nnz, i, m; 4206 PetscBool nooffprocentries; 4207 Mat_MPIAIJ *Aij = (Mat_MPIAIJ *)mat->data; 4208 Mat_SeqAIJ *Ad = (Mat_SeqAIJ *)Aij->A->data; 4209 Mat_SeqAIJ *Ao = (Mat_SeqAIJ *)Aij->B->data; 4210 PetscScalar *ad, *ao; 4211 const PetscInt *Adi = Ad->i, *Adj = Ao->i; 4212 PetscInt ldi, Iii, md; 4213 PetscInt *ld = Aij->ld; 4214 4215 PetscFunctionBegin; 4216 m = mat->rmap->n; 4217 4218 PetscCall(MatSeqAIJGetArrayWrite(Aij->A, &ad)); 4219 PetscCall(MatSeqAIJGetArrayWrite(Aij->B, &ao)); 4220 Iii = 0; 4221 for (i = 0; i < m; i++) { 4222 nnz = Adi[i + 1] - Adi[i] + Adj[i + 1] - Adj[i]; 4223 ldi = ld[i]; 4224 md = Adi[i + 1] - Adi[i]; 4225 PetscCall(PetscArraycpy(ao, v + Iii, ldi)); 4226 PetscCall(PetscArraycpy(ad, v + Iii + ldi, md)); 4227 PetscCall(PetscArraycpy(ao + ldi, v + Iii + ldi + md, nnz - ldi - md)); 4228 ad += md; 4229 ao += nnz - md; 4230 Iii += nnz; 4231 } 4232 nooffprocentries = mat->nooffprocentries; 4233 mat->nooffprocentries = PETSC_TRUE; 4234 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->A, &ad)); 4235 PetscCall(MatSeqAIJRestoreArrayWrite(Aij->B, &ao)); 4236 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->A)); 4237 PetscCall(PetscObjectStateIncrease((PetscObject)Aij->B)); 4238 PetscCall(PetscObjectStateIncrease((PetscObject)mat)); 4239 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 4240 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 4241 mat->nooffprocentries = nooffprocentries; 4242 PetscFunctionReturn(0); 4243 } 4244 4245 /*@C 4246 MatCreateAIJ - Creates a sparse parallel matrix in AIJ format 4247 (the default parallel PETSc format). For good matrix assembly performance 4248 the user should preallocate the matrix storage by setting the parameters 4249 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 4250 performance can be increased by more than a factor of 50. 4251 4252 Collective 4253 4254 Input Parameters: 4255 + comm - MPI communicator 4256 . m - number of local rows (or PETSC_DECIDE to have calculated if M is given) 4257 This value should be the same as the local size used in creating the 4258 y vector for the matrix-vector product y = Ax. 4259 . n - This value should be the same as the local size used in creating the 4260 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 4261 calculated if N is given) For square matrices n is almost always m. 4262 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 4263 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 4264 . d_nz - number of nonzeros per row in DIAGONAL portion of local submatrix 4265 (same value is used for all local rows) 4266 . d_nnz - array containing the number of nonzeros in the various rows of the 4267 DIAGONAL portion of the local submatrix (possibly different for each row) 4268 or NULL, if d_nz is used to specify the nonzero structure. 4269 The size of this array is equal to the number of local rows, i.e 'm'. 4270 . o_nz - number of nonzeros per row in the OFF-DIAGONAL portion of local 4271 submatrix (same value is used for all local rows). 4272 - o_nnz - array containing the number of nonzeros in the various rows of the 4273 OFF-DIAGONAL portion of the local submatrix (possibly different for 4274 each row) or NULL, if o_nz is used to specify the nonzero 4275 structure. The size of this array is equal to the number 4276 of local rows, i.e 'm'. 4277 4278 Output Parameter: 4279 . A - the matrix 4280 4281 It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(), 4282 MatXXXXSetPreallocation() paradigm instead of this routine directly. 4283 [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation] 4284 4285 Notes: 4286 If the *_nnz parameter is given then the *_nz parameter is ignored 4287 4288 m,n,M,N parameters specify the size of the matrix, and its partitioning across 4289 processors, while d_nz,d_nnz,o_nz,o_nnz parameters specify the approximate 4290 storage requirements for this matrix. 4291 4292 If PETSC_DECIDE or PETSC_DETERMINE is used for a particular argument on one 4293 processor than it must be used on all processors that share the object for 4294 that argument. 4295 4296 The user MUST specify either the local or global matrix dimensions 4297 (possibly both). 4298 4299 The parallel matrix is partitioned across processors such that the 4300 first m0 rows belong to process 0, the next m1 rows belong to 4301 process 1, the next m2 rows belong to process 2 etc.. where 4302 m0,m1,m2,.. are the input parameter 'm'. i.e each processor stores 4303 values corresponding to [m x N] submatrix. 4304 4305 The columns are logically partitioned with the n0 columns belonging 4306 to 0th partition, the next n1 columns belonging to the next 4307 partition etc.. where n0,n1,n2... are the input parameter 'n'. 4308 4309 The DIAGONAL portion of the local submatrix on any given processor 4310 is the submatrix corresponding to the rows and columns m,n 4311 corresponding to the given processor. i.e diagonal matrix on 4312 process 0 is [m0 x n0], diagonal matrix on process 1 is [m1 x n1] 4313 etc. The remaining portion of the local submatrix [m x (N-n)] 4314 constitute the OFF-DIAGONAL portion. The example below better 4315 illustrates this concept. 4316 4317 For a square global matrix we define each processor's diagonal portion 4318 to be its local rows and the corresponding columns (a square submatrix); 4319 each processor's off-diagonal portion encompasses the remainder of the 4320 local matrix (a rectangular submatrix). 4321 4322 If o_nnz, d_nnz are specified, then o_nz, and d_nz are ignored. 4323 4324 When calling this routine with a single process communicator, a matrix of 4325 type SEQAIJ is returned. If a matrix of type MPIAIJ is desired for this 4326 type of communicator, use the construction mechanism 4327 .vb 4328 MatCreate(...,&A); MatSetType(A,MATMPIAIJ); MatSetSizes(A, m,n,M,N); MatMPIAIJSetPreallocation(A,...); 4329 .ve 4330 4331 $ MatCreate(...,&A); 4332 $ MatSetType(A,MATMPIAIJ); 4333 $ MatSetSizes(A, m,n,M,N); 4334 $ MatMPIAIJSetPreallocation(A,...); 4335 4336 By default, this format uses inodes (identical nodes) when possible. 4337 We search for consecutive rows with the same nonzero structure, thereby 4338 reusing matrix information to achieve increased efficiency. 4339 4340 Options Database Keys: 4341 + -mat_no_inode - Do not use inodes 4342 . -mat_inode_limit <limit> - Sets inode limit (max limit=5) 4343 - -matmult_vecscatter_view <viewer> - View the vecscatter (i.e., communication pattern) used in MatMult() of sparse parallel matrices. 4344 See viewer types in manual of MatView(). Of them, ascii_matlab, draw or binary cause the vecscatter be viewed as a matrix. 4345 Entry (i,j) is the size of message (in bytes) rank i sends to rank j in one MatMult() call. 4346 4347 Example usage: 4348 4349 Consider the following 8x8 matrix with 34 non-zero values, that is 4350 assembled across 3 processors. Lets assume that proc0 owns 3 rows, 4351 proc1 owns 3 rows, proc2 owns 2 rows. This division can be shown 4352 as follows 4353 4354 .vb 4355 1 2 0 | 0 3 0 | 0 4 4356 Proc0 0 5 6 | 7 0 0 | 8 0 4357 9 0 10 | 11 0 0 | 12 0 4358 ------------------------------------- 4359 13 0 14 | 15 16 17 | 0 0 4360 Proc1 0 18 0 | 19 20 21 | 0 0 4361 0 0 0 | 22 23 0 | 24 0 4362 ------------------------------------- 4363 Proc2 25 26 27 | 0 0 28 | 29 0 4364 30 0 0 | 31 32 33 | 0 34 4365 .ve 4366 4367 This can be represented as a collection of submatrices as 4368 4369 .vb 4370 A B C 4371 D E F 4372 G H I 4373 .ve 4374 4375 Where the submatrices A,B,C are owned by proc0, D,E,F are 4376 owned by proc1, G,H,I are owned by proc2. 4377 4378 The 'm' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4379 The 'n' parameters for proc0,proc1,proc2 are 3,3,2 respectively. 4380 The 'M','N' parameters are 8,8, and have the same values on all procs. 4381 4382 The DIAGONAL submatrices corresponding to proc0,proc1,proc2 are 4383 submatrices [A], [E], [I] respectively. The OFF-DIAGONAL submatrices 4384 corresponding to proc0,proc1,proc2 are [BC], [DF], [GH] respectively. 4385 Internally, each processor stores the DIAGONAL part, and the OFF-DIAGONAL 4386 part as SeqAIJ matrices. for eg: proc1 will store [E] as a SeqAIJ 4387 matrix, ans [DF] as another SeqAIJ matrix. 4388 4389 When d_nz, o_nz parameters are specified, d_nz storage elements are 4390 allocated for every row of the local diagonal submatrix, and o_nz 4391 storage locations are allocated for every row of the OFF-DIAGONAL submat. 4392 One way to choose d_nz and o_nz is to use the max nonzerors per local 4393 rows for each of the local DIAGONAL, and the OFF-DIAGONAL submatrices. 4394 In this case, the values of d_nz,o_nz are 4395 .vb 4396 proc0 : dnz = 2, o_nz = 2 4397 proc1 : dnz = 3, o_nz = 2 4398 proc2 : dnz = 1, o_nz = 4 4399 .ve 4400 We are allocating m*(d_nz+o_nz) storage locations for every proc. This 4401 translates to 3*(2+2)=12 for proc0, 3*(3+2)=15 for proc1, 2*(1+4)=10 4402 for proc3. i.e we are using 12+15+10=37 storage locations to store 4403 34 values. 4404 4405 When d_nnz, o_nnz parameters are specified, the storage is specified 4406 for every row, corresponding to both DIAGONAL and OFF-DIAGONAL submatrices. 4407 In the above case the values for d_nnz,o_nnz are 4408 .vb 4409 proc0: d_nnz = [2,2,2] and o_nnz = [2,2,2] 4410 proc1: d_nnz = [3,3,2] and o_nnz = [2,1,1] 4411 proc2: d_nnz = [1,1] and o_nnz = [4,4] 4412 .ve 4413 Here the space allocated is sum of all the above values i.e 34, and 4414 hence pre-allocation is perfect. 4415 4416 Level: intermediate 4417 4418 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 4419 `MATMPIAIJ`, `MatCreateMPIAIJWithArrays()` 4420 @*/ 4421 PetscErrorCode MatCreateAIJ(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 4422 PetscMPIInt size; 4423 4424 PetscFunctionBegin; 4425 PetscCall(MatCreate(comm, A)); 4426 PetscCall(MatSetSizes(*A, m, n, M, N)); 4427 PetscCallMPI(MPI_Comm_size(comm, &size)); 4428 if (size > 1) { 4429 PetscCall(MatSetType(*A, MATMPIAIJ)); 4430 PetscCall(MatMPIAIJSetPreallocation(*A, d_nz, d_nnz, o_nz, o_nnz)); 4431 } else { 4432 PetscCall(MatSetType(*A, MATSEQAIJ)); 4433 PetscCall(MatSeqAIJSetPreallocation(*A, d_nz, d_nnz)); 4434 } 4435 PetscFunctionReturn(0); 4436 } 4437 4438 /*@C 4439 MatMPIAIJGetSeqAIJ - Returns the local piece of this distributed matrix 4440 4441 Not collective 4442 4443 Input Parameter: 4444 . A - The MPIAIJ matrix 4445 4446 Output Parameters: 4447 + Ad - The local diagonal block as a SeqAIJ matrix 4448 . Ao - The local off-diagonal block as a SeqAIJ matrix 4449 - colmap - An array mapping local column numbers of Ao to global column numbers of the parallel matrix 4450 4451 Note: The rows in Ad and Ao are in [0, Nr), where Nr is the number of local rows on this process. The columns 4452 in Ad are in [0, Nc) where Nc is the number of local columns. The columns are Ao are in [0, Nco), where Nco is 4453 the number of nonzero columns in the local off-diagonal piece of the matrix A. The array colmap maps these 4454 local column numbers to global column numbers in the original matrix. 4455 4456 Level: intermediate 4457 4458 .seealso: `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MATSEQAIJ` 4459 @*/ 4460 PetscErrorCode MatMPIAIJGetSeqAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 4461 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 4462 PetscBool flg; 4463 4464 PetscFunctionBegin; 4465 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &flg)); 4466 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIAIJ matrix as input"); 4467 if (Ad) *Ad = a->A; 4468 if (Ao) *Ao = a->B; 4469 if (colmap) *colmap = a->garray; 4470 PetscFunctionReturn(0); 4471 } 4472 4473 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 4474 PetscInt m, N, i, rstart, nnz, Ii; 4475 PetscInt *indx; 4476 PetscScalar *values; 4477 MatType rootType; 4478 4479 PetscFunctionBegin; 4480 PetscCall(MatGetSize(inmat, &m, &N)); 4481 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 4482 PetscInt *dnz, *onz, sum, bs, cbs; 4483 4484 if (n == PETSC_DECIDE) { PetscCall(PetscSplitOwnership(comm, &n, &N)); } 4485 /* Check sum(n) = N */ 4486 PetscCall(MPIU_Allreduce(&n, &sum, 1, MPIU_INT, MPI_SUM, comm)); 4487 PetscCheck(sum == N, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local columns %" PetscInt_FMT " != global columns %" PetscInt_FMT, sum, N); 4488 4489 PetscCallMPI(MPI_Scan(&m, &rstart, 1, MPIU_INT, MPI_SUM, comm)); 4490 rstart -= m; 4491 4492 MatPreallocateBegin(comm, m, n, dnz, onz); 4493 for (i = 0; i < m; i++) { 4494 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4495 PetscCall(MatPreallocateSet(i + rstart, nnz, indx, dnz, onz)); 4496 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, NULL)); 4497 } 4498 4499 PetscCall(MatCreate(comm, outmat)); 4500 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4501 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 4502 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 4503 PetscCall(MatGetRootType_Private(inmat, &rootType)); 4504 PetscCall(MatSetType(*outmat, rootType)); 4505 PetscCall(MatSeqAIJSetPreallocation(*outmat, 0, dnz)); 4506 PetscCall(MatMPIAIJSetPreallocation(*outmat, 0, dnz, 0, onz)); 4507 MatPreallocateEnd(dnz, onz); 4508 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 4509 } 4510 4511 /* numeric phase */ 4512 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 4513 for (i = 0; i < m; i++) { 4514 PetscCall(MatGetRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4515 Ii = i + rstart; 4516 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 4517 PetscCall(MatRestoreRow_SeqAIJ(inmat, i, &nnz, &indx, &values)); 4518 } 4519 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 4520 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 4521 PetscFunctionReturn(0); 4522 } 4523 4524 PetscErrorCode MatFileSplit(Mat A, char *outfile) { 4525 PetscMPIInt rank; 4526 PetscInt m, N, i, rstart, nnz; 4527 size_t len; 4528 const PetscInt *indx; 4529 PetscViewer out; 4530 char *name; 4531 Mat B; 4532 const PetscScalar *values; 4533 4534 PetscFunctionBegin; 4535 PetscCall(MatGetLocalSize(A, &m, NULL)); 4536 PetscCall(MatGetSize(A, NULL, &N)); 4537 /* Should this be the type of the diagonal block of A? */ 4538 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 4539 PetscCall(MatSetSizes(B, m, N, m, N)); 4540 PetscCall(MatSetBlockSizesFromMats(B, A, A)); 4541 PetscCall(MatSetType(B, MATSEQAIJ)); 4542 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 4543 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 4544 for (i = 0; i < m; i++) { 4545 PetscCall(MatGetRow(A, i + rstart, &nnz, &indx, &values)); 4546 PetscCall(MatSetValues(B, 1, &i, nnz, indx, values, INSERT_VALUES)); 4547 PetscCall(MatRestoreRow(A, i + rstart, &nnz, &indx, &values)); 4548 } 4549 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 4550 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 4551 4552 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 4553 PetscCall(PetscStrlen(outfile, &len)); 4554 PetscCall(PetscMalloc1(len + 6, &name)); 4555 PetscCall(PetscSNPrintf(name, len + 6, "%s.%d", outfile, rank)); 4556 PetscCall(PetscViewerBinaryOpen(PETSC_COMM_SELF, name, FILE_MODE_APPEND, &out)); 4557 PetscCall(PetscFree(name)); 4558 PetscCall(MatView(B, out)); 4559 PetscCall(PetscViewerDestroy(&out)); 4560 PetscCall(MatDestroy(&B)); 4561 PetscFunctionReturn(0); 4562 } 4563 4564 static PetscErrorCode MatDestroy_MPIAIJ_SeqsToMPI(void *data) { 4565 Mat_Merge_SeqsToMPI *merge = (Mat_Merge_SeqsToMPI *)data; 4566 4567 PetscFunctionBegin; 4568 if (!merge) PetscFunctionReturn(0); 4569 PetscCall(PetscFree(merge->id_r)); 4570 PetscCall(PetscFree(merge->len_s)); 4571 PetscCall(PetscFree(merge->len_r)); 4572 PetscCall(PetscFree(merge->bi)); 4573 PetscCall(PetscFree(merge->bj)); 4574 PetscCall(PetscFree(merge->buf_ri[0])); 4575 PetscCall(PetscFree(merge->buf_ri)); 4576 PetscCall(PetscFree(merge->buf_rj[0])); 4577 PetscCall(PetscFree(merge->buf_rj)); 4578 PetscCall(PetscFree(merge->coi)); 4579 PetscCall(PetscFree(merge->coj)); 4580 PetscCall(PetscFree(merge->owners_co)); 4581 PetscCall(PetscLayoutDestroy(&merge->rowmap)); 4582 PetscCall(PetscFree(merge)); 4583 PetscFunctionReturn(0); 4584 } 4585 4586 #include <../src/mat/utils/freespace.h> 4587 #include <petscbt.h> 4588 4589 PetscErrorCode MatCreateMPIAIJSumSeqAIJNumeric(Mat seqmat, Mat mpimat) { 4590 MPI_Comm comm; 4591 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4592 PetscMPIInt size, rank, taga, *len_s; 4593 PetscInt N = mpimat->cmap->N, i, j, *owners, *ai = a->i, *aj; 4594 PetscInt proc, m; 4595 PetscInt **buf_ri, **buf_rj; 4596 PetscInt k, anzi, *bj_i, *bi, *bj, arow, bnzi, nextaj; 4597 PetscInt nrows, **buf_ri_k, **nextrow, **nextai; 4598 MPI_Request *s_waits, *r_waits; 4599 MPI_Status *status; 4600 const MatScalar *aa, *a_a; 4601 MatScalar **abuf_r, *ba_i; 4602 Mat_Merge_SeqsToMPI *merge; 4603 PetscContainer container; 4604 4605 PetscFunctionBegin; 4606 PetscCall(PetscObjectGetComm((PetscObject)mpimat, &comm)); 4607 PetscCall(PetscLogEventBegin(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4608 4609 PetscCallMPI(MPI_Comm_size(comm, &size)); 4610 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4611 4612 PetscCall(PetscObjectQuery((PetscObject)mpimat, "MatMergeSeqsToMPI", (PetscObject *)&container)); 4613 PetscCheck(container, PetscObjectComm((PetscObject)mpimat), PETSC_ERR_PLIB, "Mat not created from MatCreateMPIAIJSumSeqAIJSymbolic"); 4614 PetscCall(PetscContainerGetPointer(container, (void **)&merge)); 4615 PetscCall(MatSeqAIJGetArrayRead(seqmat, &a_a)); 4616 aa = a_a; 4617 4618 bi = merge->bi; 4619 bj = merge->bj; 4620 buf_ri = merge->buf_ri; 4621 buf_rj = merge->buf_rj; 4622 4623 PetscCall(PetscMalloc1(size, &status)); 4624 owners = merge->rowmap->range; 4625 len_s = merge->len_s; 4626 4627 /* send and recv matrix values */ 4628 /*-----------------------------*/ 4629 PetscCall(PetscObjectGetNewTag((PetscObject)mpimat, &taga)); 4630 PetscCall(PetscPostIrecvScalar(comm, taga, merge->nrecv, merge->id_r, merge->len_r, &abuf_r, &r_waits)); 4631 4632 PetscCall(PetscMalloc1(merge->nsend + 1, &s_waits)); 4633 for (proc = 0, k = 0; proc < size; proc++) { 4634 if (!len_s[proc]) continue; 4635 i = owners[proc]; 4636 PetscCallMPI(MPI_Isend(aa + ai[i], len_s[proc], MPIU_MATSCALAR, proc, taga, comm, s_waits + k)); 4637 k++; 4638 } 4639 4640 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, r_waits, status)); 4641 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, s_waits, status)); 4642 PetscCall(PetscFree(status)); 4643 4644 PetscCall(PetscFree(s_waits)); 4645 PetscCall(PetscFree(r_waits)); 4646 4647 /* insert mat values of mpimat */ 4648 /*----------------------------*/ 4649 PetscCall(PetscMalloc1(N, &ba_i)); 4650 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4651 4652 for (k = 0; k < merge->nrecv; k++) { 4653 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4654 nrows = *(buf_ri_k[k]); 4655 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4656 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4657 } 4658 4659 /* set values of ba */ 4660 m = merge->rowmap->n; 4661 for (i = 0; i < m; i++) { 4662 arow = owners[rank] + i; 4663 bj_i = bj + bi[i]; /* col indices of the i-th row of mpimat */ 4664 bnzi = bi[i + 1] - bi[i]; 4665 PetscCall(PetscArrayzero(ba_i, bnzi)); 4666 4667 /* add local non-zero vals of this proc's seqmat into ba */ 4668 anzi = ai[arow + 1] - ai[arow]; 4669 aj = a->j + ai[arow]; 4670 aa = a_a + ai[arow]; 4671 nextaj = 0; 4672 for (j = 0; nextaj < anzi; j++) { 4673 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4674 ba_i[j] += aa[nextaj++]; 4675 } 4676 } 4677 4678 /* add received vals into ba */ 4679 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4680 /* i-th row */ 4681 if (i == *nextrow[k]) { 4682 anzi = *(nextai[k] + 1) - *nextai[k]; 4683 aj = buf_rj[k] + *(nextai[k]); 4684 aa = abuf_r[k] + *(nextai[k]); 4685 nextaj = 0; 4686 for (j = 0; nextaj < anzi; j++) { 4687 if (*(bj_i + j) == aj[nextaj]) { /* bcol == acol */ 4688 ba_i[j] += aa[nextaj++]; 4689 } 4690 } 4691 nextrow[k]++; 4692 nextai[k]++; 4693 } 4694 } 4695 PetscCall(MatSetValues(mpimat, 1, &arow, bnzi, bj_i, ba_i, INSERT_VALUES)); 4696 } 4697 PetscCall(MatSeqAIJRestoreArrayRead(seqmat, &a_a)); 4698 PetscCall(MatAssemblyBegin(mpimat, MAT_FINAL_ASSEMBLY)); 4699 PetscCall(MatAssemblyEnd(mpimat, MAT_FINAL_ASSEMBLY)); 4700 4701 PetscCall(PetscFree(abuf_r[0])); 4702 PetscCall(PetscFree(abuf_r)); 4703 PetscCall(PetscFree(ba_i)); 4704 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4705 PetscCall(PetscLogEventEnd(MAT_Seqstompinum, seqmat, 0, 0, 0)); 4706 PetscFunctionReturn(0); 4707 } 4708 4709 PetscErrorCode MatCreateMPIAIJSumSeqAIJSymbolic(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, Mat *mpimat) { 4710 Mat B_mpi; 4711 Mat_SeqAIJ *a = (Mat_SeqAIJ *)seqmat->data; 4712 PetscMPIInt size, rank, tagi, tagj, *len_s, *len_si, *len_ri; 4713 PetscInt **buf_rj, **buf_ri, **buf_ri_k; 4714 PetscInt M = seqmat->rmap->n, N = seqmat->cmap->n, i, *owners, *ai = a->i, *aj = a->j; 4715 PetscInt len, proc, *dnz, *onz, bs, cbs; 4716 PetscInt k, anzi, *bi, *bj, *lnk, nlnk, arow, bnzi; 4717 PetscInt nrows, *buf_s, *buf_si, *buf_si_i, **nextrow, **nextai; 4718 MPI_Request *si_waits, *sj_waits, *ri_waits, *rj_waits; 4719 MPI_Status *status; 4720 PetscFreeSpaceList free_space = NULL, current_space = NULL; 4721 PetscBT lnkbt; 4722 Mat_Merge_SeqsToMPI *merge; 4723 PetscContainer container; 4724 4725 PetscFunctionBegin; 4726 PetscCall(PetscLogEventBegin(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4727 4728 /* make sure it is a PETSc comm */ 4729 PetscCall(PetscCommDuplicate(comm, &comm, NULL)); 4730 PetscCallMPI(MPI_Comm_size(comm, &size)); 4731 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 4732 4733 PetscCall(PetscNew(&merge)); 4734 PetscCall(PetscMalloc1(size, &status)); 4735 4736 /* determine row ownership */ 4737 /*---------------------------------------------------------*/ 4738 PetscCall(PetscLayoutCreate(comm, &merge->rowmap)); 4739 PetscCall(PetscLayoutSetLocalSize(merge->rowmap, m)); 4740 PetscCall(PetscLayoutSetSize(merge->rowmap, M)); 4741 PetscCall(PetscLayoutSetBlockSize(merge->rowmap, 1)); 4742 PetscCall(PetscLayoutSetUp(merge->rowmap)); 4743 PetscCall(PetscMalloc1(size, &len_si)); 4744 PetscCall(PetscMalloc1(size, &merge->len_s)); 4745 4746 m = merge->rowmap->n; 4747 owners = merge->rowmap->range; 4748 4749 /* determine the number of messages to send, their lengths */ 4750 /*---------------------------------------------------------*/ 4751 len_s = merge->len_s; 4752 4753 len = 0; /* length of buf_si[] */ 4754 merge->nsend = 0; 4755 for (proc = 0; proc < size; proc++) { 4756 len_si[proc] = 0; 4757 if (proc == rank) { 4758 len_s[proc] = 0; 4759 } else { 4760 len_si[proc] = owners[proc + 1] - owners[proc] + 1; 4761 len_s[proc] = ai[owners[proc + 1]] - ai[owners[proc]]; /* num of rows to be sent to [proc] */ 4762 } 4763 if (len_s[proc]) { 4764 merge->nsend++; 4765 nrows = 0; 4766 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4767 if (ai[i + 1] > ai[i]) nrows++; 4768 } 4769 len_si[proc] = 2 * (nrows + 1); 4770 len += len_si[proc]; 4771 } 4772 } 4773 4774 /* determine the number and length of messages to receive for ij-structure */ 4775 /*-------------------------------------------------------------------------*/ 4776 PetscCall(PetscGatherNumberOfMessages(comm, NULL, len_s, &merge->nrecv)); 4777 PetscCall(PetscGatherMessageLengths2(comm, merge->nsend, merge->nrecv, len_s, len_si, &merge->id_r, &merge->len_r, &len_ri)); 4778 4779 /* post the Irecv of j-structure */ 4780 /*-------------------------------*/ 4781 PetscCall(PetscCommGetNewTag(comm, &tagj)); 4782 PetscCall(PetscPostIrecvInt(comm, tagj, merge->nrecv, merge->id_r, merge->len_r, &buf_rj, &rj_waits)); 4783 4784 /* post the Isend of j-structure */ 4785 /*--------------------------------*/ 4786 PetscCall(PetscMalloc2(merge->nsend, &si_waits, merge->nsend, &sj_waits)); 4787 4788 for (proc = 0, k = 0; proc < size; proc++) { 4789 if (!len_s[proc]) continue; 4790 i = owners[proc]; 4791 PetscCallMPI(MPI_Isend(aj + ai[i], len_s[proc], MPIU_INT, proc, tagj, comm, sj_waits + k)); 4792 k++; 4793 } 4794 4795 /* receives and sends of j-structure are complete */ 4796 /*------------------------------------------------*/ 4797 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, rj_waits, status)); 4798 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, sj_waits, status)); 4799 4800 /* send and recv i-structure */ 4801 /*---------------------------*/ 4802 PetscCall(PetscCommGetNewTag(comm, &tagi)); 4803 PetscCall(PetscPostIrecvInt(comm, tagi, merge->nrecv, merge->id_r, len_ri, &buf_ri, &ri_waits)); 4804 4805 PetscCall(PetscMalloc1(len + 1, &buf_s)); 4806 buf_si = buf_s; /* points to the beginning of k-th msg to be sent */ 4807 for (proc = 0, k = 0; proc < size; proc++) { 4808 if (!len_s[proc]) continue; 4809 /* form outgoing message for i-structure: 4810 buf_si[0]: nrows to be sent 4811 [1:nrows]: row index (global) 4812 [nrows+1:2*nrows+1]: i-structure index 4813 */ 4814 /*-------------------------------------------*/ 4815 nrows = len_si[proc] / 2 - 1; 4816 buf_si_i = buf_si + nrows + 1; 4817 buf_si[0] = nrows; 4818 buf_si_i[0] = 0; 4819 nrows = 0; 4820 for (i = owners[proc]; i < owners[proc + 1]; i++) { 4821 anzi = ai[i + 1] - ai[i]; 4822 if (anzi) { 4823 buf_si_i[nrows + 1] = buf_si_i[nrows] + anzi; /* i-structure */ 4824 buf_si[nrows + 1] = i - owners[proc]; /* local row index */ 4825 nrows++; 4826 } 4827 } 4828 PetscCallMPI(MPI_Isend(buf_si, len_si[proc], MPIU_INT, proc, tagi, comm, si_waits + k)); 4829 k++; 4830 buf_si += len_si[proc]; 4831 } 4832 4833 if (merge->nrecv) PetscCallMPI(MPI_Waitall(merge->nrecv, ri_waits, status)); 4834 if (merge->nsend) PetscCallMPI(MPI_Waitall(merge->nsend, si_waits, status)); 4835 4836 PetscCall(PetscInfo(seqmat, "nsend: %d, nrecv: %d\n", merge->nsend, merge->nrecv)); 4837 for (i = 0; i < merge->nrecv; i++) { PetscCall(PetscInfo(seqmat, "recv len_ri=%d, len_rj=%d from [%d]\n", len_ri[i], merge->len_r[i], merge->id_r[i])); } 4838 4839 PetscCall(PetscFree(len_si)); 4840 PetscCall(PetscFree(len_ri)); 4841 PetscCall(PetscFree(rj_waits)); 4842 PetscCall(PetscFree2(si_waits, sj_waits)); 4843 PetscCall(PetscFree(ri_waits)); 4844 PetscCall(PetscFree(buf_s)); 4845 PetscCall(PetscFree(status)); 4846 4847 /* compute a local seq matrix in each processor */ 4848 /*----------------------------------------------*/ 4849 /* allocate bi array and free space for accumulating nonzero column info */ 4850 PetscCall(PetscMalloc1(m + 1, &bi)); 4851 bi[0] = 0; 4852 4853 /* create and initialize a linked list */ 4854 nlnk = N + 1; 4855 PetscCall(PetscLLCreate(N, N, nlnk, lnk, lnkbt)); 4856 4857 /* initial FreeSpace size is 2*(num of local nnz(seqmat)) */ 4858 len = ai[owners[rank + 1]] - ai[owners[rank]]; 4859 PetscCall(PetscFreeSpaceGet(PetscIntMultTruncate(2, len) + 1, &free_space)); 4860 4861 current_space = free_space; 4862 4863 /* determine symbolic info for each local row */ 4864 PetscCall(PetscMalloc3(merge->nrecv, &buf_ri_k, merge->nrecv, &nextrow, merge->nrecv, &nextai)); 4865 4866 for (k = 0; k < merge->nrecv; k++) { 4867 buf_ri_k[k] = buf_ri[k]; /* beginning of k-th recved i-structure */ 4868 nrows = *buf_ri_k[k]; 4869 nextrow[k] = buf_ri_k[k] + 1; /* next row number of k-th recved i-structure */ 4870 nextai[k] = buf_ri_k[k] + (nrows + 1); /* points to the next i-structure of k-th recved i-structure */ 4871 } 4872 4873 MatPreallocateBegin(comm, m, n, dnz, onz); 4874 len = 0; 4875 for (i = 0; i < m; i++) { 4876 bnzi = 0; 4877 /* add local non-zero cols of this proc's seqmat into lnk */ 4878 arow = owners[rank] + i; 4879 anzi = ai[arow + 1] - ai[arow]; 4880 aj = a->j + ai[arow]; 4881 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4882 bnzi += nlnk; 4883 /* add received col data into lnk */ 4884 for (k = 0; k < merge->nrecv; k++) { /* k-th received message */ 4885 if (i == *nextrow[k]) { /* i-th row */ 4886 anzi = *(nextai[k] + 1) - *nextai[k]; 4887 aj = buf_rj[k] + *nextai[k]; 4888 PetscCall(PetscLLAddSorted(anzi, aj, N, &nlnk, lnk, lnkbt)); 4889 bnzi += nlnk; 4890 nextrow[k]++; 4891 nextai[k]++; 4892 } 4893 } 4894 if (len < bnzi) len = bnzi; /* =max(bnzi) */ 4895 4896 /* if free space is not available, make more free space */ 4897 if (current_space->local_remaining < bnzi) { PetscCall(PetscFreeSpaceGet(PetscIntSumTruncate(bnzi, current_space->total_array_size), ¤t_space)); } 4898 /* copy data into free space, then initialize lnk */ 4899 PetscCall(PetscLLClean(N, N, bnzi, lnk, current_space->array, lnkbt)); 4900 PetscCall(MatPreallocateSet(i + owners[rank], bnzi, current_space->array, dnz, onz)); 4901 4902 current_space->array += bnzi; 4903 current_space->local_used += bnzi; 4904 current_space->local_remaining -= bnzi; 4905 4906 bi[i + 1] = bi[i] + bnzi; 4907 } 4908 4909 PetscCall(PetscFree3(buf_ri_k, nextrow, nextai)); 4910 4911 PetscCall(PetscMalloc1(bi[m] + 1, &bj)); 4912 PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4913 PetscCall(PetscLLDestroy(lnk, lnkbt)); 4914 4915 /* create symbolic parallel matrix B_mpi */ 4916 /*---------------------------------------*/ 4917 PetscCall(MatGetBlockSizes(seqmat, &bs, &cbs)); 4918 PetscCall(MatCreate(comm, &B_mpi)); 4919 if (n == PETSC_DECIDE) { 4920 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, N)); 4921 } else { 4922 PetscCall(MatSetSizes(B_mpi, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 4923 } 4924 PetscCall(MatSetBlockSizes(B_mpi, bs, cbs)); 4925 PetscCall(MatSetType(B_mpi, MATMPIAIJ)); 4926 PetscCall(MatMPIAIJSetPreallocation(B_mpi, 0, dnz, 0, onz)); 4927 MatPreallocateEnd(dnz, onz); 4928 PetscCall(MatSetOption(B_mpi, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE)); 4929 4930 /* B_mpi is not ready for use - assembly will be done by MatCreateMPIAIJSumSeqAIJNumeric() */ 4931 B_mpi->assembled = PETSC_FALSE; 4932 merge->bi = bi; 4933 merge->bj = bj; 4934 merge->buf_ri = buf_ri; 4935 merge->buf_rj = buf_rj; 4936 merge->coi = NULL; 4937 merge->coj = NULL; 4938 merge->owners_co = NULL; 4939 4940 PetscCall(PetscCommDestroy(&comm)); 4941 4942 /* attach the supporting struct to B_mpi for reuse */ 4943 PetscCall(PetscContainerCreate(PETSC_COMM_SELF, &container)); 4944 PetscCall(PetscContainerSetPointer(container, merge)); 4945 PetscCall(PetscContainerSetUserDestroy(container, MatDestroy_MPIAIJ_SeqsToMPI)); 4946 PetscCall(PetscObjectCompose((PetscObject)B_mpi, "MatMergeSeqsToMPI", (PetscObject)container)); 4947 PetscCall(PetscContainerDestroy(&container)); 4948 *mpimat = B_mpi; 4949 4950 PetscCall(PetscLogEventEnd(MAT_Seqstompisym, seqmat, 0, 0, 0)); 4951 PetscFunctionReturn(0); 4952 } 4953 4954 /*@C 4955 MatCreateMPIAIJSumSeqAIJ - Creates a MATMPIAIJ matrix by adding sequential 4956 matrices from each processor 4957 4958 Collective 4959 4960 Input Parameters: 4961 + comm - the communicators the parallel matrix will live on 4962 . seqmat - the input sequential matrices 4963 . m - number of local rows (or PETSC_DECIDE) 4964 . n - number of local columns (or PETSC_DECIDE) 4965 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 4966 4967 Output Parameter: 4968 . mpimat - the parallel matrix generated 4969 4970 Level: advanced 4971 4972 Notes: 4973 The dimensions of the sequential matrix in each processor MUST be the same. 4974 The input seqmat is included into the container "Mat_Merge_SeqsToMPI", and will be 4975 destroyed when mpimat is destroyed. Call PetscObjectQuery() to access seqmat. 4976 @*/ 4977 PetscErrorCode MatCreateMPIAIJSumSeqAIJ(MPI_Comm comm, Mat seqmat, PetscInt m, PetscInt n, MatReuse scall, Mat *mpimat) { 4978 PetscMPIInt size; 4979 4980 PetscFunctionBegin; 4981 PetscCallMPI(MPI_Comm_size(comm, &size)); 4982 if (size == 1) { 4983 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4984 if (scall == MAT_INITIAL_MATRIX) { 4985 PetscCall(MatDuplicate(seqmat, MAT_COPY_VALUES, mpimat)); 4986 } else { 4987 PetscCall(MatCopy(seqmat, *mpimat, SAME_NONZERO_PATTERN)); 4988 } 4989 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4990 PetscFunctionReturn(0); 4991 } 4992 PetscCall(PetscLogEventBegin(MAT_Seqstompi, seqmat, 0, 0, 0)); 4993 if (scall == MAT_INITIAL_MATRIX) { PetscCall(MatCreateMPIAIJSumSeqAIJSymbolic(comm, seqmat, m, n, mpimat)); } 4994 PetscCall(MatCreateMPIAIJSumSeqAIJNumeric(seqmat, *mpimat)); 4995 PetscCall(PetscLogEventEnd(MAT_Seqstompi, seqmat, 0, 0, 0)); 4996 PetscFunctionReturn(0); 4997 } 4998 4999 /*@ 5000 MatAIJGetLocalMat - Creates a SeqAIJ from a MATAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5001 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5002 with MatGetSize() 5003 5004 Not Collective 5005 5006 Input Parameters: 5007 + A - the matrix 5008 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5009 5010 Output Parameter: 5011 . A_loc - the local sequential matrix generated 5012 5013 Level: developer 5014 5015 Notes: 5016 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5017 5018 Destroy the matrix with MatDestroy() 5019 5020 .seealso: MatMPIAIJGetLocalMat() 5021 5022 @*/ 5023 PetscErrorCode MatAIJGetLocalMat(Mat A, Mat *A_loc) { 5024 PetscBool mpi; 5025 5026 PetscFunctionBegin; 5027 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &mpi)); 5028 if (mpi) { 5029 PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, A_loc)); 5030 } else { 5031 *A_loc = A; 5032 PetscCall(PetscObjectReference((PetscObject)*A_loc)); 5033 } 5034 PetscFunctionReturn(0); 5035 } 5036 5037 /*@ 5038 MatMPIAIJGetLocalMat - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5039 mlocal rows and n columns. Where mlocal is the row count obtained with MatGetLocalSize() and n is the global column count obtained 5040 with MatGetSize() 5041 5042 Not Collective 5043 5044 Input Parameters: 5045 + A - the matrix 5046 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5047 5048 Output Parameter: 5049 . A_loc - the local sequential matrix generated 5050 5051 Level: developer 5052 5053 Notes: 5054 In other words combines the two parts of a parallel MPIAIJ matrix on each process to a single matrix. 5055 5056 When the communicator associated with A has size 1 and MAT_INITIAL_MATRIX is requested, the matrix returned is the diagonal part of A. 5057 If MAT_REUSE_MATRIX is requested with comm size 1, MatCopy(Adiag,*A_loc,SAME_NONZERO_PATTERN) is called. 5058 This means that one can preallocate the proper sequential matrix first and then call this routine with MAT_REUSE_MATRIX to safely 5059 modify the values of the returned A_loc. 5060 5061 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMatCondensed()`, `MatMPIAIJGetLocalMatMerge()` 5062 @*/ 5063 PetscErrorCode MatMPIAIJGetLocalMat(Mat A, MatReuse scall, Mat *A_loc) { 5064 Mat_MPIAIJ *mpimat = (Mat_MPIAIJ *)A->data; 5065 Mat_SeqAIJ *mat, *a, *b; 5066 PetscInt *ai, *aj, *bi, *bj, *cmap = mpimat->garray; 5067 const PetscScalar *aa, *ba, *aav, *bav; 5068 PetscScalar *ca, *cam; 5069 PetscMPIInt size; 5070 PetscInt am = A->rmap->n, i, j, k, cstart = A->cmap->rstart; 5071 PetscInt *ci, *cj, col, ncols_d, ncols_o, jo; 5072 PetscBool match; 5073 5074 PetscFunctionBegin; 5075 PetscCall(PetscStrbeginswith(((PetscObject)A)->type_name, MATMPIAIJ, &match)); 5076 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5077 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5078 if (size == 1) { 5079 if (scall == MAT_INITIAL_MATRIX) { 5080 PetscCall(PetscObjectReference((PetscObject)mpimat->A)); 5081 *A_loc = mpimat->A; 5082 } else if (scall == MAT_REUSE_MATRIX) { 5083 PetscCall(MatCopy(mpimat->A, *A_loc, SAME_NONZERO_PATTERN)); 5084 } 5085 PetscFunctionReturn(0); 5086 } 5087 5088 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5089 a = (Mat_SeqAIJ *)(mpimat->A)->data; 5090 b = (Mat_SeqAIJ *)(mpimat->B)->data; 5091 ai = a->i; 5092 aj = a->j; 5093 bi = b->i; 5094 bj = b->j; 5095 PetscCall(MatSeqAIJGetArrayRead(mpimat->A, &aav)); 5096 PetscCall(MatSeqAIJGetArrayRead(mpimat->B, &bav)); 5097 aa = aav; 5098 ba = bav; 5099 if (scall == MAT_INITIAL_MATRIX) { 5100 PetscCall(PetscMalloc1(1 + am, &ci)); 5101 ci[0] = 0; 5102 for (i = 0; i < am; i++) { ci[i + 1] = ci[i] + (ai[i + 1] - ai[i]) + (bi[i + 1] - bi[i]); } 5103 PetscCall(PetscMalloc1(1 + ci[am], &cj)); 5104 PetscCall(PetscMalloc1(1 + ci[am], &ca)); 5105 k = 0; 5106 for (i = 0; i < am; i++) { 5107 ncols_o = bi[i + 1] - bi[i]; 5108 ncols_d = ai[i + 1] - ai[i]; 5109 /* off-diagonal portion of A */ 5110 for (jo = 0; jo < ncols_o; jo++) { 5111 col = cmap[*bj]; 5112 if (col >= cstart) break; 5113 cj[k] = col; 5114 bj++; 5115 ca[k++] = *ba++; 5116 } 5117 /* diagonal portion of A */ 5118 for (j = 0; j < ncols_d; j++) { 5119 cj[k] = cstart + *aj++; 5120 ca[k++] = *aa++; 5121 } 5122 /* off-diagonal portion of A */ 5123 for (j = jo; j < ncols_o; j++) { 5124 cj[k] = cmap[*bj++]; 5125 ca[k++] = *ba++; 5126 } 5127 } 5128 /* put together the new matrix */ 5129 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, A->cmap->N, ci, cj, ca, A_loc)); 5130 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5131 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5132 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5133 mat->free_a = PETSC_TRUE; 5134 mat->free_ij = PETSC_TRUE; 5135 mat->nonew = 0; 5136 } else if (scall == MAT_REUSE_MATRIX) { 5137 mat = (Mat_SeqAIJ *)(*A_loc)->data; 5138 ci = mat->i; 5139 cj = mat->j; 5140 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &cam)); 5141 for (i = 0; i < am; i++) { 5142 /* off-diagonal portion of A */ 5143 ncols_o = bi[i + 1] - bi[i]; 5144 for (jo = 0; jo < ncols_o; jo++) { 5145 col = cmap[*bj]; 5146 if (col >= cstart) break; 5147 *cam++ = *ba++; 5148 bj++; 5149 } 5150 /* diagonal portion of A */ 5151 ncols_d = ai[i + 1] - ai[i]; 5152 for (j = 0; j < ncols_d; j++) *cam++ = *aa++; 5153 /* off-diagonal portion of A */ 5154 for (j = jo; j < ncols_o; j++) { 5155 *cam++ = *ba++; 5156 bj++; 5157 } 5158 } 5159 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &cam)); 5160 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5161 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->A, &aav)); 5162 PetscCall(MatSeqAIJRestoreArrayRead(mpimat->B, &bav)); 5163 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5164 PetscFunctionReturn(0); 5165 } 5166 5167 /*@ 5168 MatMPIAIJGetLocalMatMerge - Creates a SeqAIJ from a MATMPIAIJ matrix by taking all its local rows and putting them into a sequential matrix with 5169 mlocal rows and n columns. Where n is the sum of the number of columns of the diagonal and offdiagonal part 5170 5171 Not Collective 5172 5173 Input Parameters: 5174 + A - the matrix 5175 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5176 5177 Output Parameters: 5178 + glob - sequential IS with global indices associated with the columns of the local sequential matrix generated (can be NULL) 5179 - A_loc - the local sequential matrix generated 5180 5181 Level: developer 5182 5183 Notes: 5184 This is different from MatMPIAIJGetLocalMat() since the first columns in the returning matrix are those associated with the diagonal part, then those associated with the offdiagonal part (in its local ordering) 5185 5186 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()`, `MatMPIAIJGetLocalMatCondensed()` 5187 5188 @*/ 5189 PetscErrorCode MatMPIAIJGetLocalMatMerge(Mat A, MatReuse scall, IS *glob, Mat *A_loc) { 5190 Mat Ao, Ad; 5191 const PetscInt *cmap; 5192 PetscMPIInt size; 5193 PetscErrorCode (*f)(Mat, MatReuse, IS *, Mat *); 5194 5195 PetscFunctionBegin; 5196 PetscCall(MatMPIAIJGetSeqAIJ(A, &Ad, &Ao, &cmap)); 5197 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 5198 if (size == 1) { 5199 if (scall == MAT_INITIAL_MATRIX) { 5200 PetscCall(PetscObjectReference((PetscObject)Ad)); 5201 *A_loc = Ad; 5202 } else if (scall == MAT_REUSE_MATRIX) { 5203 PetscCall(MatCopy(Ad, *A_loc, SAME_NONZERO_PATTERN)); 5204 } 5205 if (glob) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)Ad), Ad->cmap->n, Ad->cmap->rstart, 1, glob)); 5206 PetscFunctionReturn(0); 5207 } 5208 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatMPIAIJGetLocalMatMerge_C", &f)); 5209 PetscCall(PetscLogEventBegin(MAT_Getlocalmat, A, 0, 0, 0)); 5210 if (f) { 5211 PetscCall((*f)(A, scall, glob, A_loc)); 5212 } else { 5213 Mat_SeqAIJ *a = (Mat_SeqAIJ *)Ad->data; 5214 Mat_SeqAIJ *b = (Mat_SeqAIJ *)Ao->data; 5215 Mat_SeqAIJ *c; 5216 PetscInt *ai = a->i, *aj = a->j; 5217 PetscInt *bi = b->i, *bj = b->j; 5218 PetscInt *ci, *cj; 5219 const PetscScalar *aa, *ba; 5220 PetscScalar *ca; 5221 PetscInt i, j, am, dn, on; 5222 5223 PetscCall(MatGetLocalSize(Ad, &am, &dn)); 5224 PetscCall(MatGetLocalSize(Ao, NULL, &on)); 5225 PetscCall(MatSeqAIJGetArrayRead(Ad, &aa)); 5226 PetscCall(MatSeqAIJGetArrayRead(Ao, &ba)); 5227 if (scall == MAT_INITIAL_MATRIX) { 5228 PetscInt k; 5229 PetscCall(PetscMalloc1(1 + am, &ci)); 5230 PetscCall(PetscMalloc1(ai[am] + bi[am], &cj)); 5231 PetscCall(PetscMalloc1(ai[am] + bi[am], &ca)); 5232 ci[0] = 0; 5233 for (i = 0, k = 0; i < am; i++) { 5234 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5235 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5236 ci[i + 1] = ci[i] + ncols_o + ncols_d; 5237 /* diagonal portion of A */ 5238 for (j = 0; j < ncols_d; j++, k++) { 5239 cj[k] = *aj++; 5240 ca[k] = *aa++; 5241 } 5242 /* off-diagonal portion of A */ 5243 for (j = 0; j < ncols_o; j++, k++) { 5244 cj[k] = dn + *bj++; 5245 ca[k] = *ba++; 5246 } 5247 } 5248 /* put together the new matrix */ 5249 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, am, dn + on, ci, cj, ca, A_loc)); 5250 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5251 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5252 c = (Mat_SeqAIJ *)(*A_loc)->data; 5253 c->free_a = PETSC_TRUE; 5254 c->free_ij = PETSC_TRUE; 5255 c->nonew = 0; 5256 PetscCall(MatSetType(*A_loc, ((PetscObject)Ad)->type_name)); 5257 } else if (scall == MAT_REUSE_MATRIX) { 5258 PetscCall(MatSeqAIJGetArrayWrite(*A_loc, &ca)); 5259 for (i = 0; i < am; i++) { 5260 const PetscInt ncols_d = ai[i + 1] - ai[i]; 5261 const PetscInt ncols_o = bi[i + 1] - bi[i]; 5262 /* diagonal portion of A */ 5263 for (j = 0; j < ncols_d; j++) *ca++ = *aa++; 5264 /* off-diagonal portion of A */ 5265 for (j = 0; j < ncols_o; j++) *ca++ = *ba++; 5266 } 5267 PetscCall(MatSeqAIJRestoreArrayWrite(*A_loc, &ca)); 5268 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid MatReuse %d", (int)scall); 5269 PetscCall(MatSeqAIJRestoreArrayRead(Ad, &aa)); 5270 PetscCall(MatSeqAIJRestoreArrayRead(Ao, &aa)); 5271 if (glob) { 5272 PetscInt cst, *gidx; 5273 5274 PetscCall(MatGetOwnershipRangeColumn(A, &cst, NULL)); 5275 PetscCall(PetscMalloc1(dn + on, &gidx)); 5276 for (i = 0; i < dn; i++) gidx[i] = cst + i; 5277 for (i = 0; i < on; i++) gidx[i + dn] = cmap[i]; 5278 PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)Ad), dn + on, gidx, PETSC_OWN_POINTER, glob)); 5279 } 5280 } 5281 PetscCall(PetscLogEventEnd(MAT_Getlocalmat, A, 0, 0, 0)); 5282 PetscFunctionReturn(0); 5283 } 5284 5285 /*@C 5286 MatMPIAIJGetLocalMatCondensed - Creates a SeqAIJ matrix from an MATMPIAIJ matrix by taking all its local rows and NON-ZERO columns 5287 5288 Not Collective 5289 5290 Input Parameters: 5291 + A - the matrix 5292 . scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5293 - row, col - index sets of rows and columns to extract (or NULL) 5294 5295 Output Parameter: 5296 . A_loc - the local sequential matrix generated 5297 5298 Level: developer 5299 5300 .seealso: `MatGetOwnershipRange()`, `MatMPIAIJGetLocalMat()` 5301 5302 @*/ 5303 PetscErrorCode MatMPIAIJGetLocalMatCondensed(Mat A, MatReuse scall, IS *row, IS *col, Mat *A_loc) { 5304 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5305 PetscInt i, start, end, ncols, nzA, nzB, *cmap, imark, *idx; 5306 IS isrowa, iscola; 5307 Mat *aloc; 5308 PetscBool match; 5309 5310 PetscFunctionBegin; 5311 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIAIJ, &match)); 5312 PetscCheck(match, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Requires MATMPIAIJ matrix as input"); 5313 PetscCall(PetscLogEventBegin(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5314 if (!row) { 5315 start = A->rmap->rstart; 5316 end = A->rmap->rend; 5317 PetscCall(ISCreateStride(PETSC_COMM_SELF, end - start, start, 1, &isrowa)); 5318 } else { 5319 isrowa = *row; 5320 } 5321 if (!col) { 5322 start = A->cmap->rstart; 5323 cmap = a->garray; 5324 nzA = a->A->cmap->n; 5325 nzB = a->B->cmap->n; 5326 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5327 ncols = 0; 5328 for (i = 0; i < nzB; i++) { 5329 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5330 else break; 5331 } 5332 imark = i; 5333 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; 5334 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; 5335 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &iscola)); 5336 } else { 5337 iscola = *col; 5338 } 5339 if (scall != MAT_INITIAL_MATRIX) { 5340 PetscCall(PetscMalloc1(1, &aloc)); 5341 aloc[0] = *A_loc; 5342 } 5343 PetscCall(MatCreateSubMatrices(A, 1, &isrowa, &iscola, scall, &aloc)); 5344 if (!col) { /* attach global id of condensed columns */ 5345 PetscCall(PetscObjectCompose((PetscObject)aloc[0], "_petsc_GetLocalMatCondensed_iscol", (PetscObject)iscola)); 5346 } 5347 *A_loc = aloc[0]; 5348 PetscCall(PetscFree(aloc)); 5349 if (!row) { PetscCall(ISDestroy(&isrowa)); } 5350 if (!col) { PetscCall(ISDestroy(&iscola)); } 5351 PetscCall(PetscLogEventEnd(MAT_Getlocalmatcondensed, A, 0, 0, 0)); 5352 PetscFunctionReturn(0); 5353 } 5354 5355 /* 5356 * Create a sequential AIJ matrix based on row indices. a whole column is extracted once a row is matched. 5357 * Row could be local or remote.The routine is designed to be scalable in memory so that nothing is based 5358 * on a global size. 5359 * */ 5360 PetscErrorCode MatCreateSeqSubMatrixWithRows_Private(Mat P, IS rows, Mat *P_oth) { 5361 Mat_MPIAIJ *p = (Mat_MPIAIJ *)P->data; 5362 Mat_SeqAIJ *pd = (Mat_SeqAIJ *)(p->A)->data, *po = (Mat_SeqAIJ *)(p->B)->data, *p_oth; 5363 PetscInt plocalsize, nrows, *ilocal, *oilocal, i, lidx, *nrcols, *nlcols, ncol; 5364 PetscMPIInt owner; 5365 PetscSFNode *iremote, *oiremote; 5366 const PetscInt *lrowindices; 5367 PetscSF sf, osf; 5368 PetscInt pcstart, *roffsets, *loffsets, *pnnz, j; 5369 PetscInt ontotalcols, dntotalcols, ntotalcols, nout; 5370 MPI_Comm comm; 5371 ISLocalToGlobalMapping mapping; 5372 const PetscScalar *pd_a, *po_a; 5373 5374 PetscFunctionBegin; 5375 PetscCall(PetscObjectGetComm((PetscObject)P, &comm)); 5376 /* plocalsize is the number of roots 5377 * nrows is the number of leaves 5378 * */ 5379 PetscCall(MatGetLocalSize(P, &plocalsize, NULL)); 5380 PetscCall(ISGetLocalSize(rows, &nrows)); 5381 PetscCall(PetscCalloc1(nrows, &iremote)); 5382 PetscCall(ISGetIndices(rows, &lrowindices)); 5383 for (i = 0; i < nrows; i++) { 5384 /* Find a remote index and an owner for a row 5385 * The row could be local or remote 5386 * */ 5387 owner = 0; 5388 lidx = 0; 5389 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, &lidx)); 5390 iremote[i].index = lidx; 5391 iremote[i].rank = owner; 5392 } 5393 /* Create SF to communicate how many nonzero columns for each row */ 5394 PetscCall(PetscSFCreate(comm, &sf)); 5395 /* SF will figure out the number of nonzero colunms for each row, and their 5396 * offsets 5397 * */ 5398 PetscCall(PetscSFSetGraph(sf, plocalsize, nrows, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5399 PetscCall(PetscSFSetFromOptions(sf)); 5400 PetscCall(PetscSFSetUp(sf)); 5401 5402 PetscCall(PetscCalloc1(2 * (plocalsize + 1), &roffsets)); 5403 PetscCall(PetscCalloc1(2 * plocalsize, &nrcols)); 5404 PetscCall(PetscCalloc1(nrows, &pnnz)); 5405 roffsets[0] = 0; 5406 roffsets[1] = 0; 5407 for (i = 0; i < plocalsize; i++) { 5408 /* diag */ 5409 nrcols[i * 2 + 0] = pd->i[i + 1] - pd->i[i]; 5410 /* off diag */ 5411 nrcols[i * 2 + 1] = po->i[i + 1] - po->i[i]; 5412 /* compute offsets so that we relative location for each row */ 5413 roffsets[(i + 1) * 2 + 0] = roffsets[i * 2 + 0] + nrcols[i * 2 + 0]; 5414 roffsets[(i + 1) * 2 + 1] = roffsets[i * 2 + 1] + nrcols[i * 2 + 1]; 5415 } 5416 PetscCall(PetscCalloc1(2 * nrows, &nlcols)); 5417 PetscCall(PetscCalloc1(2 * nrows, &loffsets)); 5418 /* 'r' means root, and 'l' means leaf */ 5419 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5420 PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5421 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, nrcols, nlcols, MPI_REPLACE)); 5422 PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, roffsets, loffsets, MPI_REPLACE)); 5423 PetscCall(PetscSFDestroy(&sf)); 5424 PetscCall(PetscFree(roffsets)); 5425 PetscCall(PetscFree(nrcols)); 5426 dntotalcols = 0; 5427 ontotalcols = 0; 5428 ncol = 0; 5429 for (i = 0; i < nrows; i++) { 5430 pnnz[i] = nlcols[i * 2 + 0] + nlcols[i * 2 + 1]; 5431 ncol = PetscMax(pnnz[i], ncol); 5432 /* diag */ 5433 dntotalcols += nlcols[i * 2 + 0]; 5434 /* off diag */ 5435 ontotalcols += nlcols[i * 2 + 1]; 5436 } 5437 /* We do not need to figure the right number of columns 5438 * since all the calculations will be done by going through the raw data 5439 * */ 5440 PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, nrows, ncol, 0, pnnz, P_oth)); 5441 PetscCall(MatSetUp(*P_oth)); 5442 PetscCall(PetscFree(pnnz)); 5443 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5444 /* diag */ 5445 PetscCall(PetscCalloc1(dntotalcols, &iremote)); 5446 /* off diag */ 5447 PetscCall(PetscCalloc1(ontotalcols, &oiremote)); 5448 /* diag */ 5449 PetscCall(PetscCalloc1(dntotalcols, &ilocal)); 5450 /* off diag */ 5451 PetscCall(PetscCalloc1(ontotalcols, &oilocal)); 5452 dntotalcols = 0; 5453 ontotalcols = 0; 5454 ntotalcols = 0; 5455 for (i = 0; i < nrows; i++) { 5456 owner = 0; 5457 PetscCall(PetscLayoutFindOwnerIndex(P->rmap, lrowindices[i], &owner, NULL)); 5458 /* Set iremote for diag matrix */ 5459 for (j = 0; j < nlcols[i * 2 + 0]; j++) { 5460 iremote[dntotalcols].index = loffsets[i * 2 + 0] + j; 5461 iremote[dntotalcols].rank = owner; 5462 /* P_oth is seqAIJ so that ilocal need to point to the first part of memory */ 5463 ilocal[dntotalcols++] = ntotalcols++; 5464 } 5465 /* off diag */ 5466 for (j = 0; j < nlcols[i * 2 + 1]; j++) { 5467 oiremote[ontotalcols].index = loffsets[i * 2 + 1] + j; 5468 oiremote[ontotalcols].rank = owner; 5469 oilocal[ontotalcols++] = ntotalcols++; 5470 } 5471 } 5472 PetscCall(ISRestoreIndices(rows, &lrowindices)); 5473 PetscCall(PetscFree(loffsets)); 5474 PetscCall(PetscFree(nlcols)); 5475 PetscCall(PetscSFCreate(comm, &sf)); 5476 /* P serves as roots and P_oth is leaves 5477 * Diag matrix 5478 * */ 5479 PetscCall(PetscSFSetGraph(sf, pd->i[plocalsize], dntotalcols, ilocal, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 5480 PetscCall(PetscSFSetFromOptions(sf)); 5481 PetscCall(PetscSFSetUp(sf)); 5482 5483 PetscCall(PetscSFCreate(comm, &osf)); 5484 /* Off diag */ 5485 PetscCall(PetscSFSetGraph(osf, po->i[plocalsize], ontotalcols, oilocal, PETSC_OWN_POINTER, oiremote, PETSC_OWN_POINTER)); 5486 PetscCall(PetscSFSetFromOptions(osf)); 5487 PetscCall(PetscSFSetUp(osf)); 5488 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5489 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5490 /* We operate on the matrix internal data for saving memory */ 5491 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5492 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5493 PetscCall(MatGetOwnershipRangeColumn(P, &pcstart, NULL)); 5494 /* Convert to global indices for diag matrix */ 5495 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] += pcstart; 5496 PetscCall(PetscSFBcastBegin(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5497 /* We want P_oth store global indices */ 5498 PetscCall(ISLocalToGlobalMappingCreate(comm, 1, p->B->cmap->n, p->garray, PETSC_COPY_VALUES, &mapping)); 5499 /* Use memory scalable approach */ 5500 PetscCall(ISLocalToGlobalMappingSetType(mapping, ISLOCALTOGLOBALMAPPINGHASH)); 5501 PetscCall(ISLocalToGlobalMappingApply(mapping, po->i[plocalsize], po->j, po->j)); 5502 PetscCall(PetscSFBcastBegin(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5503 PetscCall(PetscSFBcastEnd(sf, MPIU_INT, pd->j, p_oth->j, MPI_REPLACE)); 5504 /* Convert back to local indices */ 5505 for (i = 0; i < pd->i[plocalsize]; i++) pd->j[i] -= pcstart; 5506 PetscCall(PetscSFBcastEnd(osf, MPIU_INT, po->j, p_oth->j, MPI_REPLACE)); 5507 nout = 0; 5508 PetscCall(ISGlobalToLocalMappingApply(mapping, IS_GTOLM_DROP, po->i[plocalsize], po->j, &nout, po->j)); 5509 PetscCheck(nout == po->i[plocalsize], comm, PETSC_ERR_ARG_INCOMP, "n %" PetscInt_FMT " does not equal to nout %" PetscInt_FMT " ", po->i[plocalsize], nout); 5510 PetscCall(ISLocalToGlobalMappingDestroy(&mapping)); 5511 /* Exchange values */ 5512 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5513 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5514 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5515 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5516 /* Stop PETSc from shrinking memory */ 5517 for (i = 0; i < nrows; i++) p_oth->ilen[i] = p_oth->imax[i]; 5518 PetscCall(MatAssemblyBegin(*P_oth, MAT_FINAL_ASSEMBLY)); 5519 PetscCall(MatAssemblyEnd(*P_oth, MAT_FINAL_ASSEMBLY)); 5520 /* Attach PetscSF objects to P_oth so that we can reuse it later */ 5521 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "diagsf", (PetscObject)sf)); 5522 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "offdiagsf", (PetscObject)osf)); 5523 PetscCall(PetscSFDestroy(&sf)); 5524 PetscCall(PetscSFDestroy(&osf)); 5525 PetscFunctionReturn(0); 5526 } 5527 5528 /* 5529 * Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5530 * This supports MPIAIJ and MAIJ 5531 * */ 5532 PetscErrorCode MatGetBrowsOfAcols_MPIXAIJ(Mat A, Mat P, PetscInt dof, MatReuse reuse, Mat *P_oth) { 5533 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data, *p = (Mat_MPIAIJ *)P->data; 5534 Mat_SeqAIJ *p_oth; 5535 IS rows, map; 5536 PetscHMapI hamp; 5537 PetscInt i, htsize, *rowindices, off, *mapping, key, count; 5538 MPI_Comm comm; 5539 PetscSF sf, osf; 5540 PetscBool has; 5541 5542 PetscFunctionBegin; 5543 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5544 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5545 /* If it is the first time, create an index set of off-diag nonzero columns of A, 5546 * and then create a submatrix (that often is an overlapping matrix) 5547 * */ 5548 if (reuse == MAT_INITIAL_MATRIX) { 5549 /* Use a hash table to figure out unique keys */ 5550 PetscCall(PetscHMapICreate(&hamp)); 5551 PetscCall(PetscHMapIResize(hamp, a->B->cmap->n)); 5552 PetscCall(PetscCalloc1(a->B->cmap->n, &mapping)); 5553 count = 0; 5554 /* Assume that a->g is sorted, otherwise the following does not make sense */ 5555 for (i = 0; i < a->B->cmap->n; i++) { 5556 key = a->garray[i] / dof; 5557 PetscCall(PetscHMapIHas(hamp, key, &has)); 5558 if (!has) { 5559 mapping[i] = count; 5560 PetscCall(PetscHMapISet(hamp, key, count++)); 5561 } else { 5562 /* Current 'i' has the same value the previous step */ 5563 mapping[i] = count - 1; 5564 } 5565 } 5566 PetscCall(ISCreateGeneral(comm, a->B->cmap->n, mapping, PETSC_OWN_POINTER, &map)); 5567 PetscCall(PetscHMapIGetSize(hamp, &htsize)); 5568 PetscCheck(htsize == count, comm, PETSC_ERR_ARG_INCOMP, " Size of hash map %" PetscInt_FMT " is inconsistent with count %" PetscInt_FMT " ", htsize, count); 5569 PetscCall(PetscCalloc1(htsize, &rowindices)); 5570 off = 0; 5571 PetscCall(PetscHMapIGetKeys(hamp, &off, rowindices)); 5572 PetscCall(PetscHMapIDestroy(&hamp)); 5573 PetscCall(PetscSortInt(htsize, rowindices)); 5574 PetscCall(ISCreateGeneral(comm, htsize, rowindices, PETSC_OWN_POINTER, &rows)); 5575 /* In case, the matrix was already created but users want to recreate the matrix */ 5576 PetscCall(MatDestroy(P_oth)); 5577 PetscCall(MatCreateSeqSubMatrixWithRows_Private(P, rows, P_oth)); 5578 PetscCall(PetscObjectCompose((PetscObject)*P_oth, "aoffdiagtopothmapping", (PetscObject)map)); 5579 PetscCall(ISDestroy(&map)); 5580 PetscCall(ISDestroy(&rows)); 5581 } else if (reuse == MAT_REUSE_MATRIX) { 5582 /* If matrix was already created, we simply update values using SF objects 5583 * that as attached to the matrix ealier. 5584 */ 5585 const PetscScalar *pd_a, *po_a; 5586 5587 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "diagsf", (PetscObject *)&sf)); 5588 PetscCall(PetscObjectQuery((PetscObject)*P_oth, "offdiagsf", (PetscObject *)&osf)); 5589 PetscCheck(sf && osf, comm, PETSC_ERR_ARG_NULL, "Matrix is not initialized yet"); 5590 p_oth = (Mat_SeqAIJ *)(*P_oth)->data; 5591 /* Update values in place */ 5592 PetscCall(MatSeqAIJGetArrayRead(p->A, &pd_a)); 5593 PetscCall(MatSeqAIJGetArrayRead(p->B, &po_a)); 5594 PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5595 PetscCall(PetscSFBcastBegin(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5596 PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, pd_a, p_oth->a, MPI_REPLACE)); 5597 PetscCall(PetscSFBcastEnd(osf, MPIU_SCALAR, po_a, p_oth->a, MPI_REPLACE)); 5598 PetscCall(MatSeqAIJRestoreArrayRead(p->A, &pd_a)); 5599 PetscCall(MatSeqAIJRestoreArrayRead(p->B, &po_a)); 5600 } else SETERRQ(comm, PETSC_ERR_ARG_UNKNOWN_TYPE, "Unknown reuse type"); 5601 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, P, 0, 0)); 5602 PetscFunctionReturn(0); 5603 } 5604 5605 /*@C 5606 MatGetBrowsOfAcols - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns of local A 5607 5608 Collective on Mat 5609 5610 Input Parameters: 5611 + A - the first matrix in mpiaij format 5612 . B - the second matrix in mpiaij format 5613 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5614 5615 Output Parameters: 5616 + rowb - On input index sets of rows of B to extract (or NULL), modified on output 5617 . colb - On input index sets of columns of B to extract (or NULL), modified on output 5618 - B_seq - the sequential matrix generated 5619 5620 Level: developer 5621 5622 @*/ 5623 PetscErrorCode MatGetBrowsOfAcols(Mat A, Mat B, MatReuse scall, IS *rowb, IS *colb, Mat *B_seq) { 5624 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5625 PetscInt *idx, i, start, ncols, nzA, nzB, *cmap, imark; 5626 IS isrowb, iscolb; 5627 Mat *bseq = NULL; 5628 5629 PetscFunctionBegin; 5630 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) { 5631 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5632 } 5633 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5634 5635 if (scall == MAT_INITIAL_MATRIX) { 5636 start = A->cmap->rstart; 5637 cmap = a->garray; 5638 nzA = a->A->cmap->n; 5639 nzB = a->B->cmap->n; 5640 PetscCall(PetscMalloc1(nzA + nzB, &idx)); 5641 ncols = 0; 5642 for (i = 0; i < nzB; i++) { /* row < local row index */ 5643 if (cmap[i] < start) idx[ncols++] = cmap[i]; 5644 else break; 5645 } 5646 imark = i; 5647 for (i = 0; i < nzA; i++) idx[ncols++] = start + i; /* local rows */ 5648 for (i = imark; i < nzB; i++) idx[ncols++] = cmap[i]; /* row > local row index */ 5649 PetscCall(ISCreateGeneral(PETSC_COMM_SELF, ncols, idx, PETSC_OWN_POINTER, &isrowb)); 5650 PetscCall(ISCreateStride(PETSC_COMM_SELF, B->cmap->N, 0, 1, &iscolb)); 5651 } else { 5652 PetscCheck(rowb && colb, PETSC_COMM_SELF, PETSC_ERR_SUP, "IS rowb and colb must be provided for MAT_REUSE_MATRIX"); 5653 isrowb = *rowb; 5654 iscolb = *colb; 5655 PetscCall(PetscMalloc1(1, &bseq)); 5656 bseq[0] = *B_seq; 5657 } 5658 PetscCall(MatCreateSubMatrices(B, 1, &isrowb, &iscolb, scall, &bseq)); 5659 *B_seq = bseq[0]; 5660 PetscCall(PetscFree(bseq)); 5661 if (!rowb) { 5662 PetscCall(ISDestroy(&isrowb)); 5663 } else { 5664 *rowb = isrowb; 5665 } 5666 if (!colb) { 5667 PetscCall(ISDestroy(&iscolb)); 5668 } else { 5669 *colb = iscolb; 5670 } 5671 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAcols, A, B, 0, 0)); 5672 PetscFunctionReturn(0); 5673 } 5674 5675 /* 5676 MatGetBrowsOfAoCols_MPIAIJ - Creates a SeqAIJ matrix by taking rows of B that equal to nonzero columns 5677 of the OFF-DIAGONAL portion of local A 5678 5679 Collective on Mat 5680 5681 Input Parameters: 5682 + A,B - the matrices in mpiaij format 5683 - scall - either MAT_INITIAL_MATRIX or MAT_REUSE_MATRIX 5684 5685 Output Parameter: 5686 + startsj_s - starting point in B's sending j-arrays, saved for MAT_REUSE (or NULL) 5687 . startsj_r - starting point in B's receiving j-arrays, saved for MAT_REUSE (or NULL) 5688 . bufa_ptr - array for sending matrix values, saved for MAT_REUSE (or NULL) 5689 - B_oth - the sequential matrix generated with size aBn=a->B->cmap->n by B->cmap->N 5690 5691 Developer Notes: This directly accesses information inside the VecScatter associated with the matrix-vector product 5692 for this matrix. This is not desirable.. 5693 5694 Level: developer 5695 5696 */ 5697 PetscErrorCode MatGetBrowsOfAoCols_MPIAIJ(Mat A, Mat B, MatReuse scall, PetscInt **startsj_s, PetscInt **startsj_r, MatScalar **bufa_ptr, Mat *B_oth) { 5698 Mat_MPIAIJ *a = (Mat_MPIAIJ *)A->data; 5699 Mat_SeqAIJ *b_oth; 5700 VecScatter ctx; 5701 MPI_Comm comm; 5702 const PetscMPIInt *rprocs, *sprocs; 5703 const PetscInt *srow, *rstarts, *sstarts; 5704 PetscInt *rowlen, *bufj, *bufJ, ncols = 0, aBn = a->B->cmap->n, row, *b_othi, *b_othj, *rvalues = NULL, *svalues = NULL, *cols, sbs, rbs; 5705 PetscInt i, j, k = 0, l, ll, nrecvs, nsends, nrows, *rstartsj = NULL, *sstartsj, len; 5706 PetscScalar *b_otha, *bufa, *bufA, *vals = NULL; 5707 MPI_Request *reqs = NULL, *rwaits = NULL, *swaits = NULL; 5708 PetscMPIInt size, tag, rank, nreqs; 5709 5710 PetscFunctionBegin; 5711 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 5712 PetscCallMPI(MPI_Comm_size(comm, &size)); 5713 5714 if (PetscUnlikely(A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend)) { 5715 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5716 } 5717 PetscCall(PetscLogEventBegin(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5718 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 5719 5720 if (size == 1) { 5721 startsj_s = NULL; 5722 bufa_ptr = NULL; 5723 *B_oth = NULL; 5724 PetscFunctionReturn(0); 5725 } 5726 5727 ctx = a->Mvctx; 5728 tag = ((PetscObject)ctx)->tag; 5729 5730 PetscCall(VecScatterGetRemote_Private(ctx, PETSC_TRUE /*send*/, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5731 /* rprocs[] must be ordered so that indices received from them are ordered in rvalues[], which is key to algorithms used in this subroutine */ 5732 PetscCall(VecScatterGetRemoteOrdered_Private(ctx, PETSC_FALSE /*recv*/, &nrecvs, &rstarts, NULL /*indices not needed*/, &rprocs, &rbs)); 5733 PetscCall(PetscMPIIntCast(nsends + nrecvs, &nreqs)); 5734 PetscCall(PetscMalloc1(nreqs, &reqs)); 5735 rwaits = reqs; 5736 swaits = reqs + nrecvs; 5737 5738 if (!startsj_s || !bufa_ptr) scall = MAT_INITIAL_MATRIX; 5739 if (scall == MAT_INITIAL_MATRIX) { 5740 /* i-array */ 5741 /*---------*/ 5742 /* post receives */ 5743 if (nrecvs) PetscCall(PetscMalloc1(rbs * (rstarts[nrecvs] - rstarts[0]), &rvalues)); /* rstarts can be NULL when nrecvs=0 */ 5744 for (i = 0; i < nrecvs; i++) { 5745 rowlen = rvalues + rstarts[i] * rbs; 5746 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of indices to be received */ 5747 PetscCallMPI(MPI_Irecv(rowlen, nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5748 } 5749 5750 /* pack the outgoing message */ 5751 PetscCall(PetscMalloc2(nsends + 1, &sstartsj, nrecvs + 1, &rstartsj)); 5752 5753 sstartsj[0] = 0; 5754 rstartsj[0] = 0; 5755 len = 0; /* total length of j or a array to be sent */ 5756 if (nsends) { 5757 k = sstarts[0]; /* ATTENTION: sstarts[0] and rstarts[0] are not necessarily zero */ 5758 PetscCall(PetscMalloc1(sbs * (sstarts[nsends] - sstarts[0]), &svalues)); 5759 } 5760 for (i = 0; i < nsends; i++) { 5761 rowlen = svalues + (sstarts[i] - sstarts[0]) * sbs; 5762 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5763 for (j = 0; j < nrows; j++) { 5764 row = srow[k] + B->rmap->range[rank]; /* global row idx */ 5765 for (l = 0; l < sbs; l++) { 5766 PetscCall(MatGetRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); /* rowlength */ 5767 5768 rowlen[j * sbs + l] = ncols; 5769 5770 len += ncols; 5771 PetscCall(MatRestoreRow_MPIAIJ(B, row + l, &ncols, NULL, NULL)); 5772 } 5773 k++; 5774 } 5775 PetscCallMPI(MPI_Isend(rowlen, nrows * sbs, MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5776 5777 sstartsj[i + 1] = len; /* starting point of (i+1)-th outgoing msg in bufj and bufa */ 5778 } 5779 /* recvs and sends of i-array are completed */ 5780 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5781 PetscCall(PetscFree(svalues)); 5782 5783 /* allocate buffers for sending j and a arrays */ 5784 PetscCall(PetscMalloc1(len + 1, &bufj)); 5785 PetscCall(PetscMalloc1(len + 1, &bufa)); 5786 5787 /* create i-array of B_oth */ 5788 PetscCall(PetscMalloc1(aBn + 2, &b_othi)); 5789 5790 b_othi[0] = 0; 5791 len = 0; /* total length of j or a array to be received */ 5792 k = 0; 5793 for (i = 0; i < nrecvs; i++) { 5794 rowlen = rvalues + (rstarts[i] - rstarts[0]) * rbs; 5795 nrows = (rstarts[i + 1] - rstarts[i]) * rbs; /* num of rows to be received */ 5796 for (j = 0; j < nrows; j++) { 5797 b_othi[k + 1] = b_othi[k] + rowlen[j]; 5798 PetscCall(PetscIntSumError(rowlen[j], len, &len)); 5799 k++; 5800 } 5801 rstartsj[i + 1] = len; /* starting point of (i+1)-th incoming msg in bufj and bufa */ 5802 } 5803 PetscCall(PetscFree(rvalues)); 5804 5805 /* allocate space for j and a arrays of B_oth */ 5806 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_othj)); 5807 PetscCall(PetscMalloc1(b_othi[aBn] + 1, &b_otha)); 5808 5809 /* j-array */ 5810 /*---------*/ 5811 /* post receives of j-array */ 5812 for (i = 0; i < nrecvs; i++) { 5813 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5814 PetscCallMPI(MPI_Irecv(b_othj + rstartsj[i], nrows, MPIU_INT, rprocs[i], tag, comm, rwaits + i)); 5815 } 5816 5817 /* pack the outgoing message j-array */ 5818 if (nsends) k = sstarts[0]; 5819 for (i = 0; i < nsends; i++) { 5820 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5821 bufJ = bufj + sstartsj[i]; 5822 for (j = 0; j < nrows; j++) { 5823 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5824 for (ll = 0; ll < sbs; ll++) { 5825 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5826 for (l = 0; l < ncols; l++) { *bufJ++ = cols[l]; } 5827 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, &cols, NULL)); 5828 } 5829 } 5830 PetscCallMPI(MPI_Isend(bufj + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_INT, sprocs[i], tag, comm, swaits + i)); 5831 } 5832 5833 /* recvs and sends of j-array are completed */ 5834 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5835 } else if (scall == MAT_REUSE_MATRIX) { 5836 sstartsj = *startsj_s; 5837 rstartsj = *startsj_r; 5838 bufa = *bufa_ptr; 5839 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5840 PetscCall(MatSeqAIJGetArrayWrite(*B_oth, &b_otha)); 5841 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix P does not possess an object container"); 5842 5843 /* a-array */ 5844 /*---------*/ 5845 /* post receives of a-array */ 5846 for (i = 0; i < nrecvs; i++) { 5847 nrows = rstartsj[i + 1] - rstartsj[i]; /* length of the msg received */ 5848 PetscCallMPI(MPI_Irecv(b_otha + rstartsj[i], nrows, MPIU_SCALAR, rprocs[i], tag, comm, rwaits + i)); 5849 } 5850 5851 /* pack the outgoing message a-array */ 5852 if (nsends) k = sstarts[0]; 5853 for (i = 0; i < nsends; i++) { 5854 nrows = sstarts[i + 1] - sstarts[i]; /* num of block rows */ 5855 bufA = bufa + sstartsj[i]; 5856 for (j = 0; j < nrows; j++) { 5857 row = srow[k++] + B->rmap->range[rank]; /* global row idx */ 5858 for (ll = 0; ll < sbs; ll++) { 5859 PetscCall(MatGetRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5860 for (l = 0; l < ncols; l++) { *bufA++ = vals[l]; } 5861 PetscCall(MatRestoreRow_MPIAIJ(B, row + ll, &ncols, NULL, &vals)); 5862 } 5863 } 5864 PetscCallMPI(MPI_Isend(bufa + sstartsj[i], sstartsj[i + 1] - sstartsj[i], MPIU_SCALAR, sprocs[i], tag, comm, swaits + i)); 5865 } 5866 /* recvs and sends of a-array are completed */ 5867 if (nreqs) PetscCallMPI(MPI_Waitall(nreqs, reqs, MPI_STATUSES_IGNORE)); 5868 PetscCall(PetscFree(reqs)); 5869 5870 if (scall == MAT_INITIAL_MATRIX) { 5871 /* put together the new matrix */ 5872 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, aBn, B->cmap->N, b_othi, b_othj, b_otha, B_oth)); 5873 5874 /* MatCreateSeqAIJWithArrays flags matrix so PETSc doesn't free the user's arrays. */ 5875 /* Since these are PETSc arrays, change flags to free them as necessary. */ 5876 b_oth = (Mat_SeqAIJ *)(*B_oth)->data; 5877 b_oth->free_a = PETSC_TRUE; 5878 b_oth->free_ij = PETSC_TRUE; 5879 b_oth->nonew = 0; 5880 5881 PetscCall(PetscFree(bufj)); 5882 if (!startsj_s || !bufa_ptr) { 5883 PetscCall(PetscFree2(sstartsj, rstartsj)); 5884 PetscCall(PetscFree(bufa_ptr)); 5885 } else { 5886 *startsj_s = sstartsj; 5887 *startsj_r = rstartsj; 5888 *bufa_ptr = bufa; 5889 } 5890 } else if (scall == MAT_REUSE_MATRIX) { 5891 PetscCall(MatSeqAIJRestoreArrayWrite(*B_oth, &b_otha)); 5892 } 5893 5894 PetscCall(VecScatterRestoreRemote_Private(ctx, PETSC_TRUE, &nsends, &sstarts, &srow, &sprocs, &sbs)); 5895 PetscCall(VecScatterRestoreRemoteOrdered_Private(ctx, PETSC_FALSE, &nrecvs, &rstarts, NULL, &rprocs, &rbs)); 5896 PetscCall(PetscLogEventEnd(MAT_GetBrowsOfAocols, A, B, 0, 0)); 5897 PetscFunctionReturn(0); 5898 } 5899 5900 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCRL(Mat, MatType, MatReuse, Mat *); 5901 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJPERM(Mat, MatType, MatReuse, Mat *); 5902 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJSELL(Mat, MatType, MatReuse, Mat *); 5903 #if defined(PETSC_HAVE_MKL_SPARSE) 5904 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJMKL(Mat, MatType, MatReuse, Mat *); 5905 #endif 5906 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIBAIJ(Mat, MatType, MatReuse, Mat *); 5907 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 5908 #if defined(PETSC_HAVE_ELEMENTAL) 5909 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_Elemental(Mat, MatType, MatReuse, Mat *); 5910 #endif 5911 #if defined(PETSC_HAVE_SCALAPACK) 5912 PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat, MatType, MatReuse, Mat *); 5913 #endif 5914 #if defined(PETSC_HAVE_HYPRE) 5915 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 5916 #endif 5917 #if defined(PETSC_HAVE_CUDA) 5918 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJCUSPARSE(Mat, MatType, MatReuse, Mat *); 5919 #endif 5920 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 5921 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPIAIJKokkos(Mat, MatType, MatReuse, Mat *); 5922 #endif 5923 PETSC_INTERN PetscErrorCode MatConvert_MPIAIJ_MPISELL(Mat, MatType, MatReuse, Mat *); 5924 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 5925 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_IS_XAIJ(Mat); 5926 5927 /* 5928 Computes (B'*A')' since computing B*A directly is untenable 5929 5930 n p p 5931 [ ] [ ] [ ] 5932 m [ A ] * n [ B ] = m [ C ] 5933 [ ] [ ] [ ] 5934 5935 */ 5936 static PetscErrorCode MatMatMultNumeric_MPIDense_MPIAIJ(Mat A, Mat B, Mat C) { 5937 Mat At, Bt, Ct; 5938 5939 PetscFunctionBegin; 5940 PetscCall(MatTranspose(A, MAT_INITIAL_MATRIX, &At)); 5941 PetscCall(MatTranspose(B, MAT_INITIAL_MATRIX, &Bt)); 5942 PetscCall(MatMatMult(Bt, At, MAT_INITIAL_MATRIX, PETSC_DEFAULT, &Ct)); 5943 PetscCall(MatDestroy(&At)); 5944 PetscCall(MatDestroy(&Bt)); 5945 PetscCall(MatTransposeSetPrecursor(Ct, C)); 5946 PetscCall(MatTranspose(Ct, MAT_REUSE_MATRIX, &C)); 5947 PetscCall(MatDestroy(&Ct)); 5948 PetscFunctionReturn(0); 5949 } 5950 5951 static PetscErrorCode MatMatMultSymbolic_MPIDense_MPIAIJ(Mat A, Mat B, PetscReal fill, Mat C) { 5952 PetscBool cisdense; 5953 5954 PetscFunctionBegin; 5955 PetscCheck(A->cmap->n == B->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "A->cmap->n %" PetscInt_FMT " != B->rmap->n %" PetscInt_FMT, A->cmap->n, B->rmap->n); 5956 PetscCall(MatSetSizes(C, A->rmap->n, B->cmap->n, A->rmap->N, B->cmap->N)); 5957 PetscCall(MatSetBlockSizesFromMats(C, A, B)); 5958 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &cisdense, MATMPIDENSE, MATMPIDENSECUDA, "")); 5959 if (!cisdense) { PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); } 5960 PetscCall(MatSetUp(C)); 5961 5962 C->ops->matmultnumeric = MatMatMultNumeric_MPIDense_MPIAIJ; 5963 PetscFunctionReturn(0); 5964 } 5965 5966 /* ----------------------------------------------------------------*/ 5967 static PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ_AB(Mat C) { 5968 Mat_Product *product = C->product; 5969 Mat A = product->A, B = product->B; 5970 5971 PetscFunctionBegin; 5972 if (A->cmap->rstart != B->rmap->rstart || A->cmap->rend != B->rmap->rend) 5973 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Matrix local dimensions are incompatible, (%" PetscInt_FMT ", %" PetscInt_FMT ") != (%" PetscInt_FMT ",%" PetscInt_FMT ")", A->cmap->rstart, A->cmap->rend, B->rmap->rstart, B->rmap->rend); 5974 5975 C->ops->matmultsymbolic = MatMatMultSymbolic_MPIDense_MPIAIJ; 5976 C->ops->productsymbolic = MatProductSymbolic_AB; 5977 PetscFunctionReturn(0); 5978 } 5979 5980 PETSC_INTERN PetscErrorCode MatProductSetFromOptions_MPIDense_MPIAIJ(Mat C) { 5981 Mat_Product *product = C->product; 5982 5983 PetscFunctionBegin; 5984 if (product->type == MATPRODUCT_AB) { PetscCall(MatProductSetFromOptions_MPIDense_MPIAIJ_AB(C)); } 5985 PetscFunctionReturn(0); 5986 } 5987 5988 /* Merge two sets of sorted nonzeros and return a CSR for the merged (sequential) matrix 5989 5990 Input Parameters: 5991 5992 j1,rowBegin1,rowEnd1,perm1,jmap1: describe the first set of nonzeros (Set1) 5993 j2,rowBegin2,rowEnd2,perm2,jmap2: describe the second set of nonzeros (Set2) 5994 5995 mat: both sets' nonzeros are on m rows, where m is the number of local rows of the matrix mat 5996 5997 For Set1, j1[] contains column indices of the nonzeros. 5998 For the k-th row (0<=k<m), [rowBegin1[k],rowEnd1[k]) index into j1[] and point to the begin/end nonzero in row k 5999 respectively (note rowEnd1[k] is not necessarily equal to rwoBegin1[k+1]). Indices in this range of j1[] are sorted, 6000 but might have repeats. jmap1[t+1] - jmap1[t] is the number of repeats for the t-th unique nonzero in Set1. 6001 6002 Similar for Set2. 6003 6004 This routine merges the two sets of nonzeros row by row and removes repeats. 6005 6006 Output Parameters: (memory is allocated by the caller) 6007 6008 i[],j[]: the CSR of the merged matrix, which has m rows. 6009 imap1[]: the k-th unique nonzero in Set1 (k=0,1,...) corresponds to imap1[k]-th unique nonzero in the merged matrix. 6010 imap2[]: similar to imap1[], but for Set2. 6011 Note we order nonzeros row-by-row and from left to right. 6012 */ 6013 static PetscErrorCode MatMergeEntries_Internal(Mat mat, const PetscInt j1[], const PetscInt j2[], const PetscCount rowBegin1[], const PetscCount rowEnd1[], const PetscCount rowBegin2[], const PetscCount rowEnd2[], const PetscCount jmap1[], const PetscCount jmap2[], PetscCount imap1[], PetscCount imap2[], PetscInt i[], PetscInt j[]) { 6014 PetscInt r, m; /* Row index of mat */ 6015 PetscCount t, t1, t2, b1, e1, b2, e2; 6016 6017 PetscFunctionBegin; 6018 PetscCall(MatGetLocalSize(mat, &m, NULL)); 6019 t1 = t2 = t = 0; /* Count unique nonzeros of in Set1, Set1 and the merged respectively */ 6020 i[0] = 0; 6021 for (r = 0; r < m; r++) { /* Do row by row merging */ 6022 b1 = rowBegin1[r]; 6023 e1 = rowEnd1[r]; 6024 b2 = rowBegin2[r]; 6025 e2 = rowEnd2[r]; 6026 while (b1 < e1 && b2 < e2) { 6027 if (j1[b1] == j2[b2]) { /* Same column index and hence same nonzero */ 6028 j[t] = j1[b1]; 6029 imap1[t1] = t; 6030 imap2[t2] = t; 6031 b1 += jmap1[t1 + 1] - jmap1[t1]; /* Jump to next unique local nonzero */ 6032 b2 += jmap2[t2 + 1] - jmap2[t2]; /* Jump to next unique remote nonzero */ 6033 t1++; 6034 t2++; 6035 t++; 6036 } else if (j1[b1] < j2[b2]) { 6037 j[t] = j1[b1]; 6038 imap1[t1] = t; 6039 b1 += jmap1[t1 + 1] - jmap1[t1]; 6040 t1++; 6041 t++; 6042 } else { 6043 j[t] = j2[b2]; 6044 imap2[t2] = t; 6045 b2 += jmap2[t2 + 1] - jmap2[t2]; 6046 t2++; 6047 t++; 6048 } 6049 } 6050 /* Merge the remaining in either j1[] or j2[] */ 6051 while (b1 < e1) { 6052 j[t] = j1[b1]; 6053 imap1[t1] = t; 6054 b1 += jmap1[t1 + 1] - jmap1[t1]; 6055 t1++; 6056 t++; 6057 } 6058 while (b2 < e2) { 6059 j[t] = j2[b2]; 6060 imap2[t2] = t; 6061 b2 += jmap2[t2 + 1] - jmap2[t2]; 6062 t2++; 6063 t++; 6064 } 6065 i[r + 1] = t; 6066 } 6067 PetscFunctionReturn(0); 6068 } 6069 6070 /* Split nonzeros in a block of local rows into two subsets: those in the diagonal block and those in the off-diagonal block 6071 6072 Input Parameters: 6073 mat: an MPI matrix that provides row and column layout information for splitting. Let's say its number of local rows is m. 6074 n,i[],j[],perm[]: there are n input entries, belonging to m rows. Row/col indices of the entries are stored in i[] and j[] 6075 respectively, along with a permutation array perm[]. Length of the i[],j[],perm[] arrays is n. 6076 6077 i[] is already sorted, but within a row, j[] is not sorted and might have repeats. 6078 i[] might contain negative indices at the beginning, which means the corresponding entries should be ignored in the splitting. 6079 6080 Output Parameters: 6081 j[],perm[]: the routine needs to sort j[] within each row along with perm[]. 6082 rowBegin[],rowMid[],rowEnd[]: of length m, and the memory is preallocated and zeroed by the caller. 6083 They contain indices pointing to j[]. For 0<=r<m, [rowBegin[r],rowMid[r]) point to begin/end entries of row r of the diagonal block, 6084 and [rowMid[r],rowEnd[r]) point to begin/end entries of row r of the off-diagonal block. 6085 6086 Aperm[],Ajmap[],Atot,Annz: Arrays are allocated by this routine. 6087 Atot: number of entries belonging to the diagonal block. 6088 Annz: number of unique nonzeros belonging to the diagonal block. 6089 Aperm[Atot] stores values from perm[] for entries belonging to the diagonal block. Length of Aperm[] is Atot, though it may also count 6090 repeats (i.e., same 'i,j' pair). 6091 Ajmap[Annz+1] stores the number of repeats of each unique entry belonging to the diagonal block. More precisely, Ajmap[t+1] - Ajmap[t] 6092 is the number of repeats for the t-th unique entry in the diagonal block. Ajmap[0] is always 0. 6093 6094 Atot: number of entries belonging to the diagonal block 6095 Annz: number of unique nonzeros belonging to the diagonal block. 6096 6097 Bperm[], Bjmap[], Btot, Bnnz are similar but for the off-diagonal block. 6098 6099 Aperm[],Bperm[],Ajmap[] and Bjmap[] are allocated separately by this routine with PetscMalloc1(). 6100 */ 6101 static PetscErrorCode MatSplitEntries_Internal(Mat mat, PetscCount n, const PetscInt i[], PetscInt j[], PetscCount perm[], PetscCount rowBegin[], PetscCount rowMid[], PetscCount rowEnd[], PetscCount *Atot_, PetscCount **Aperm_, PetscCount *Annz_, PetscCount **Ajmap_, PetscCount *Btot_, PetscCount **Bperm_, PetscCount *Bnnz_, PetscCount **Bjmap_) { 6102 PetscInt cstart, cend, rstart, rend, row, col; 6103 PetscCount Atot = 0, Btot = 0; /* Total number of nonzeros in the diagonal and off-diagonal blocks */ 6104 PetscCount Annz = 0, Bnnz = 0; /* Number of unique nonzeros in the diagonal and off-diagonal blocks */ 6105 PetscCount k, m, p, q, r, s, mid; 6106 PetscCount *Aperm, *Bperm, *Ajmap, *Bjmap; 6107 6108 PetscFunctionBegin; 6109 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6110 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6111 m = rend - rstart; 6112 6113 for (k = 0; k < n; k++) { 6114 if (i[k] >= 0) break; 6115 } /* Skip negative rows */ 6116 6117 /* Process [k,n): sort and partition each local row into diag and offdiag portions, 6118 fill rowBegin[], rowMid[], rowEnd[], and count Atot, Btot, Annz, Bnnz. 6119 */ 6120 while (k < n) { 6121 row = i[k]; 6122 /* Entries in [k,s) are in one row. Shift diagonal block col indices so that diag is ahead of offdiag after sorting the row */ 6123 for (s = k; s < n; s++) 6124 if (i[s] != row) break; 6125 for (p = k; p < s; p++) { 6126 if (j[p] >= cstart && j[p] < cend) j[p] -= PETSC_MAX_INT; /* Shift diag columns to range of [-PETSC_MAX_INT, -1] */ 6127 else PetscAssert((j[p] >= 0) && (j[p] <= mat->cmap->N), PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column index %" PetscInt_FMT " is out of range", j[p]); 6128 } 6129 PetscCall(PetscSortIntWithCountArray(s - k, j + k, perm + k)); 6130 PetscCall(PetscSortedIntUpperBound(j, k, s, -1, &mid)); /* Separate [k,s) into [k,mid) for diag and [mid,s) for offdiag */ 6131 rowBegin[row - rstart] = k; 6132 rowMid[row - rstart] = mid; 6133 rowEnd[row - rstart] = s; 6134 6135 /* Count nonzeros of this diag/offdiag row, which might have repeats */ 6136 Atot += mid - k; 6137 Btot += s - mid; 6138 6139 /* Count unique nonzeros of this diag/offdiag row */ 6140 for (p = k; p < mid;) { 6141 col = j[p]; 6142 do { 6143 j[p] += PETSC_MAX_INT; 6144 p++; 6145 } while (p < mid && j[p] == col); /* Revert the modified diagonal indices */ 6146 Annz++; 6147 } 6148 6149 for (p = mid; p < s;) { 6150 col = j[p]; 6151 do { p++; } while (p < s && j[p] == col); 6152 Bnnz++; 6153 } 6154 k = s; 6155 } 6156 6157 /* Allocation according to Atot, Btot, Annz, Bnnz */ 6158 PetscCall(PetscMalloc1(Atot, &Aperm)); 6159 PetscCall(PetscMalloc1(Btot, &Bperm)); 6160 PetscCall(PetscMalloc1(Annz + 1, &Ajmap)); 6161 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap)); 6162 6163 /* Re-scan indices and copy diag/offdiag permutation indices to Aperm, Bperm and also fill Ajmap and Bjmap */ 6164 Ajmap[0] = Bjmap[0] = Atot = Btot = Annz = Bnnz = 0; 6165 for (r = 0; r < m; r++) { 6166 k = rowBegin[r]; 6167 mid = rowMid[r]; 6168 s = rowEnd[r]; 6169 PetscCall(PetscArraycpy(Aperm + Atot, perm + k, mid - k)); 6170 PetscCall(PetscArraycpy(Bperm + Btot, perm + mid, s - mid)); 6171 Atot += mid - k; 6172 Btot += s - mid; 6173 6174 /* Scan column indices in this row and find out how many repeats each unique nonzero has */ 6175 for (p = k; p < mid;) { 6176 col = j[p]; 6177 q = p; 6178 do { p++; } while (p < mid && j[p] == col); 6179 Ajmap[Annz + 1] = Ajmap[Annz] + (p - q); 6180 Annz++; 6181 } 6182 6183 for (p = mid; p < s;) { 6184 col = j[p]; 6185 q = p; 6186 do { p++; } while (p < s && j[p] == col); 6187 Bjmap[Bnnz + 1] = Bjmap[Bnnz] + (p - q); 6188 Bnnz++; 6189 } 6190 } 6191 /* Output */ 6192 *Aperm_ = Aperm; 6193 *Annz_ = Annz; 6194 *Atot_ = Atot; 6195 *Ajmap_ = Ajmap; 6196 *Bperm_ = Bperm; 6197 *Bnnz_ = Bnnz; 6198 *Btot_ = Btot; 6199 *Bjmap_ = Bjmap; 6200 PetscFunctionReturn(0); 6201 } 6202 6203 /* Expand the jmap[] array to make a new one in view of nonzeros in the merged matrix 6204 6205 Input Parameters: 6206 nnz1: number of unique nonzeros in a set that was used to produce imap[], jmap[] 6207 nnz: number of unique nonzeros in the merged matrix 6208 imap[nnz1]: i-th nonzero in the set is the imap[i]-th nonzero in the merged matrix 6209 jmap[nnz1+1]: i-th nonzeron in the set has jmap[i+1] - jmap[i] repeats in the set 6210 6211 Output Parameter: (memory is allocated by the caller) 6212 jmap_new[nnz+1]: i-th nonzero in the merged matrix has jmap_new[i+1] - jmap_new[i] repeats in the set 6213 6214 Example: 6215 nnz1 = 4 6216 nnz = 6 6217 imap = [1,3,4,5] 6218 jmap = [0,3,5,6,7] 6219 then, 6220 jmap_new = [0,0,3,3,5,6,7] 6221 */ 6222 static PetscErrorCode ExpandJmap_Internal(PetscCount nnz1, PetscCount nnz, const PetscCount imap[], const PetscCount jmap[], PetscCount jmap_new[]) { 6223 PetscCount k, p; 6224 6225 PetscFunctionBegin; 6226 jmap_new[0] = 0; 6227 p = nnz; /* p loops over jmap_new[] backwards */ 6228 for (k = nnz1 - 1; k >= 0; k--) { /* k loops over imap[] */ 6229 for (; p > imap[k]; p--) jmap_new[p] = jmap[k + 1]; 6230 } 6231 for (; p >= 0; p--) jmap_new[p] = jmap[0]; 6232 PetscFunctionReturn(0); 6233 } 6234 6235 PetscErrorCode MatSetPreallocationCOO_MPIAIJ(Mat mat, PetscCount coo_n, PetscInt coo_i[], PetscInt coo_j[]) { 6236 MPI_Comm comm; 6237 PetscMPIInt rank, size; 6238 PetscInt m, n, M, N, rstart, rend, cstart, cend; /* Sizes, indices of row/col, therefore with type PetscInt */ 6239 PetscCount k, p, q, rem; /* Loop variables over coo arrays */ 6240 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6241 6242 PetscFunctionBegin; 6243 PetscCall(PetscFree(mpiaij->garray)); 6244 PetscCall(VecDestroy(&mpiaij->lvec)); 6245 #if defined(PETSC_USE_CTABLE) 6246 PetscCall(PetscTableDestroy(&mpiaij->colmap)); 6247 #else 6248 PetscCall(PetscFree(mpiaij->colmap)); 6249 #endif 6250 PetscCall(VecScatterDestroy(&mpiaij->Mvctx)); 6251 mat->assembled = PETSC_FALSE; 6252 mat->was_assembled = PETSC_FALSE; 6253 PetscCall(MatResetPreallocationCOO_MPIAIJ(mat)); 6254 6255 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 6256 PetscCallMPI(MPI_Comm_size(comm, &size)); 6257 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 6258 PetscCall(PetscLayoutSetUp(mat->rmap)); 6259 PetscCall(PetscLayoutSetUp(mat->cmap)); 6260 PetscCall(PetscLayoutGetRange(mat->rmap, &rstart, &rend)); 6261 PetscCall(PetscLayoutGetRange(mat->cmap, &cstart, &cend)); 6262 PetscCall(MatGetLocalSize(mat, &m, &n)); 6263 PetscCall(MatGetSize(mat, &M, &N)); 6264 6265 /* ---------------------------------------------------------------------------*/ 6266 /* Sort (i,j) by row along with a permutation array, so that the to-be-ignored */ 6267 /* entries come first, then local rows, then remote rows. */ 6268 /* ---------------------------------------------------------------------------*/ 6269 PetscCount n1 = coo_n, *perm1; 6270 PetscInt *i1 = coo_i, *j1 = coo_j; 6271 6272 PetscCall(PetscMalloc1(n1, &perm1)); 6273 for (k = 0; k < n1; k++) perm1[k] = k; 6274 6275 /* Manipulate indices so that entries with negative row or col indices will have smallest 6276 row indices, local entries will have greater but negative row indices, and remote entries 6277 will have positive row indices. 6278 */ 6279 for (k = 0; k < n1; k++) { 6280 if (i1[k] < 0 || j1[k] < 0) i1[k] = PETSC_MIN_INT; /* e.g., -2^31, minimal to move them ahead */ 6281 else if (i1[k] >= rstart && i1[k] < rend) i1[k] -= PETSC_MAX_INT; /* e.g., minus 2^31-1 to shift local rows to range of [-PETSC_MAX_INT, -1] */ 6282 else { 6283 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_USER_INPUT, "MAT_NO_OFF_PROC_ENTRIES is set but insert to remote rows"); 6284 if (mpiaij->donotstash) i1[k] = PETSC_MIN_INT; /* Ignore offproc entries as if they had negative indices */ 6285 } 6286 } 6287 6288 /* Sort by row; after that, [0,k) have ignored entires, [k,rem) have local rows and [rem,n1) have remote rows */ 6289 PetscCall(PetscSortIntWithIntCountArrayPair(n1, i1, j1, perm1)); 6290 for (k = 0; k < n1; k++) { 6291 if (i1[k] > PETSC_MIN_INT) break; 6292 } /* Advance k to the first entry we need to take care of */ 6293 PetscCall(PetscSortedIntUpperBound(i1, k, n1, rend - 1 - PETSC_MAX_INT, &rem)); /* rem is upper bound of the last local row */ 6294 for (; k < rem; k++) i1[k] += PETSC_MAX_INT; /* Revert row indices of local rows*/ 6295 6296 /* ---------------------------------------------------------------------------*/ 6297 /* Split local rows into diag/offdiag portions */ 6298 /* ---------------------------------------------------------------------------*/ 6299 PetscCount *rowBegin1, *rowMid1, *rowEnd1; 6300 PetscCount *Ajmap1, *Aperm1, *Bjmap1, *Bperm1, *Cperm1; 6301 PetscCount Annz1, Bnnz1, Atot1, Btot1; 6302 6303 PetscCall(PetscCalloc3(m, &rowBegin1, m, &rowMid1, m, &rowEnd1)); 6304 PetscCall(PetscMalloc1(n1 - rem, &Cperm1)); 6305 PetscCall(MatSplitEntries_Internal(mat, rem, i1, j1, perm1, rowBegin1, rowMid1, rowEnd1, &Atot1, &Aperm1, &Annz1, &Ajmap1, &Btot1, &Bperm1, &Bnnz1, &Bjmap1)); 6306 6307 /* ---------------------------------------------------------------------------*/ 6308 /* Send remote rows to their owner */ 6309 /* ---------------------------------------------------------------------------*/ 6310 /* Find which rows should be sent to which remote ranks*/ 6311 PetscInt nsend = 0; /* Number of MPI ranks to send data to */ 6312 PetscMPIInt *sendto; /* [nsend], storing remote ranks */ 6313 PetscInt *nentries; /* [nsend], storing number of entries sent to remote ranks; Assume PetscInt is big enough for this count, and error if not */ 6314 const PetscInt *ranges; 6315 PetscInt maxNsend = size >= 128 ? 128 : size; /* Assume max 128 neighbors; realloc when needed */ 6316 6317 PetscCall(PetscLayoutGetRanges(mat->rmap, &ranges)); 6318 PetscCall(PetscMalloc2(maxNsend, &sendto, maxNsend, &nentries)); 6319 for (k = rem; k < n1;) { 6320 PetscMPIInt owner; 6321 PetscInt firstRow, lastRow; 6322 6323 /* Locate a row range */ 6324 firstRow = i1[k]; /* first row of this owner */ 6325 PetscCall(PetscLayoutFindOwner(mat->rmap, firstRow, &owner)); 6326 lastRow = ranges[owner + 1] - 1; /* last row of this owner */ 6327 6328 /* Find the first index 'p' in [k,n) with i[p] belonging to next owner */ 6329 PetscCall(PetscSortedIntUpperBound(i1, k, n1, lastRow, &p)); 6330 6331 /* All entries in [k,p) belong to this remote owner */ 6332 if (nsend >= maxNsend) { /* Double the remote ranks arrays if not long enough */ 6333 PetscMPIInt *sendto2; 6334 PetscInt *nentries2; 6335 PetscInt maxNsend2 = (maxNsend <= size / 2) ? maxNsend * 2 : size; 6336 6337 PetscCall(PetscMalloc2(maxNsend2, &sendto2, maxNsend2, &nentries2)); 6338 PetscCall(PetscArraycpy(sendto2, sendto, maxNsend)); 6339 PetscCall(PetscArraycpy(nentries2, nentries2, maxNsend + 1)); 6340 PetscCall(PetscFree2(sendto, nentries2)); 6341 sendto = sendto2; 6342 nentries = nentries2; 6343 maxNsend = maxNsend2; 6344 } 6345 sendto[nsend] = owner; 6346 nentries[nsend] = p - k; 6347 PetscCall(PetscCountCast(p - k, &nentries[nsend])); 6348 nsend++; 6349 k = p; 6350 } 6351 6352 /* Build 1st SF to know offsets on remote to send data */ 6353 PetscSF sf1; 6354 PetscInt nroots = 1, nroots2 = 0; 6355 PetscInt nleaves = nsend, nleaves2 = 0; 6356 PetscInt *offsets; 6357 PetscSFNode *iremote; 6358 6359 PetscCall(PetscSFCreate(comm, &sf1)); 6360 PetscCall(PetscMalloc1(nsend, &iremote)); 6361 PetscCall(PetscMalloc1(nsend, &offsets)); 6362 for (k = 0; k < nsend; k++) { 6363 iremote[k].rank = sendto[k]; 6364 iremote[k].index = 0; 6365 nleaves2 += nentries[k]; 6366 PetscCheck(nleaves2 >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF leaves is too large for PetscInt"); 6367 } 6368 PetscCall(PetscSFSetGraph(sf1, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6369 PetscCall(PetscSFFetchAndOpWithMemTypeBegin(sf1, MPIU_INT, PETSC_MEMTYPE_HOST, &nroots2 /*rootdata*/, PETSC_MEMTYPE_HOST, nentries /*leafdata*/, PETSC_MEMTYPE_HOST, offsets /*leafupdate*/, MPI_SUM)); 6370 PetscCall(PetscSFFetchAndOpEnd(sf1, MPIU_INT, &nroots2, nentries, offsets, MPI_SUM)); /* Would nroots2 overflow, we check offsets[] below */ 6371 PetscCall(PetscSFDestroy(&sf1)); 6372 PetscAssert(nleaves2 == n1 - rem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "nleaves2 %" PetscInt_FMT " != number of remote entries %" PetscCount_FMT "", nleaves2, n1 - rem); 6373 6374 /* Build 2nd SF to send remote COOs to their owner */ 6375 PetscSF sf2; 6376 nroots = nroots2; 6377 nleaves = nleaves2; 6378 PetscCall(PetscSFCreate(comm, &sf2)); 6379 PetscCall(PetscSFSetFromOptions(sf2)); 6380 PetscCall(PetscMalloc1(nleaves, &iremote)); 6381 p = 0; 6382 for (k = 0; k < nsend; k++) { 6383 PetscCheck(offsets[k] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Number of SF roots is too large for PetscInt"); 6384 for (q = 0; q < nentries[k]; q++, p++) { 6385 iremote[p].rank = sendto[k]; 6386 iremote[p].index = offsets[k] + q; 6387 } 6388 } 6389 PetscCall(PetscSFSetGraph(sf2, nroots, nleaves, NULL, PETSC_OWN_POINTER, iremote, PETSC_OWN_POINTER)); 6390 6391 /* sf2 only sends contiguous leafdata to contiguous rootdata. We record the permutation which will be used to fill leafdata */ 6392 PetscCall(PetscArraycpy(Cperm1, perm1 + rem, n1 - rem)); 6393 6394 /* Send the remote COOs to their owner */ 6395 PetscInt n2 = nroots, *i2, *j2; /* Buffers for received COOs from other ranks, along with a permutation array */ 6396 PetscCount *perm2; /* Though PetscInt is enough for remote entries, we use PetscCount here as we want to reuse MatSplitEntries_Internal() */ 6397 PetscCall(PetscMalloc3(n2, &i2, n2, &j2, n2, &perm2)); 6398 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, i1 + rem, PETSC_MEMTYPE_HOST, i2, MPI_REPLACE)); 6399 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, i1 + rem, i2, MPI_REPLACE)); 6400 PetscCall(PetscSFReduceWithMemTypeBegin(sf2, MPIU_INT, PETSC_MEMTYPE_HOST, j1 + rem, PETSC_MEMTYPE_HOST, j2, MPI_REPLACE)); 6401 PetscCall(PetscSFReduceEnd(sf2, MPIU_INT, j1 + rem, j2, MPI_REPLACE)); 6402 6403 PetscCall(PetscFree(offsets)); 6404 PetscCall(PetscFree2(sendto, nentries)); 6405 6406 /* ---------------------------------------------------------------*/ 6407 /* Sort received COOs by row along with the permutation array */ 6408 /* ---------------------------------------------------------------*/ 6409 for (k = 0; k < n2; k++) perm2[k] = k; 6410 PetscCall(PetscSortIntWithIntCountArrayPair(n2, i2, j2, perm2)); 6411 6412 /* ---------------------------------------------------------------*/ 6413 /* Split received COOs into diag/offdiag portions */ 6414 /* ---------------------------------------------------------------*/ 6415 PetscCount *rowBegin2, *rowMid2, *rowEnd2; 6416 PetscCount *Ajmap2, *Aperm2, *Bjmap2, *Bperm2; 6417 PetscCount Annz2, Bnnz2, Atot2, Btot2; 6418 6419 PetscCall(PetscCalloc3(m, &rowBegin2, m, &rowMid2, m, &rowEnd2)); 6420 PetscCall(MatSplitEntries_Internal(mat, n2, i2, j2, perm2, rowBegin2, rowMid2, rowEnd2, &Atot2, &Aperm2, &Annz2, &Ajmap2, &Btot2, &Bperm2, &Bnnz2, &Bjmap2)); 6421 6422 /* --------------------------------------------------------------------------*/ 6423 /* Merge local COOs with received COOs: diag with diag, offdiag with offdiag */ 6424 /* --------------------------------------------------------------------------*/ 6425 PetscInt *Ai, *Bi; 6426 PetscInt *Aj, *Bj; 6427 6428 PetscCall(PetscMalloc1(m + 1, &Ai)); 6429 PetscCall(PetscMalloc1(m + 1, &Bi)); 6430 PetscCall(PetscMalloc1(Annz1 + Annz2, &Aj)); /* Since local and remote entries might have dups, we might allocate excess memory */ 6431 PetscCall(PetscMalloc1(Bnnz1 + Bnnz2, &Bj)); 6432 6433 PetscCount *Aimap1, *Bimap1, *Aimap2, *Bimap2; 6434 PetscCall(PetscMalloc1(Annz1, &Aimap1)); 6435 PetscCall(PetscMalloc1(Bnnz1, &Bimap1)); 6436 PetscCall(PetscMalloc1(Annz2, &Aimap2)); 6437 PetscCall(PetscMalloc1(Bnnz2, &Bimap2)); 6438 6439 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowBegin1, rowMid1, rowBegin2, rowMid2, Ajmap1, Ajmap2, Aimap1, Aimap2, Ai, Aj)); 6440 PetscCall(MatMergeEntries_Internal(mat, j1, j2, rowMid1, rowEnd1, rowMid2, rowEnd2, Bjmap1, Bjmap2, Bimap1, Bimap2, Bi, Bj)); 6441 6442 /* --------------------------------------------------------------------------*/ 6443 /* Expand Ajmap1/Bjmap1 to make them based off nonzeros in A/B, since we */ 6444 /* expect nonzeros in A/B most likely have local contributing entries */ 6445 /* --------------------------------------------------------------------------*/ 6446 PetscInt Annz = Ai[m]; 6447 PetscInt Bnnz = Bi[m]; 6448 PetscCount *Ajmap1_new, *Bjmap1_new; 6449 6450 PetscCall(PetscMalloc1(Annz + 1, &Ajmap1_new)); 6451 PetscCall(PetscMalloc1(Bnnz + 1, &Bjmap1_new)); 6452 6453 PetscCall(ExpandJmap_Internal(Annz1, Annz, Aimap1, Ajmap1, Ajmap1_new)); 6454 PetscCall(ExpandJmap_Internal(Bnnz1, Bnnz, Bimap1, Bjmap1, Bjmap1_new)); 6455 6456 PetscCall(PetscFree(Aimap1)); 6457 PetscCall(PetscFree(Ajmap1)); 6458 PetscCall(PetscFree(Bimap1)); 6459 PetscCall(PetscFree(Bjmap1)); 6460 PetscCall(PetscFree3(rowBegin1, rowMid1, rowEnd1)); 6461 PetscCall(PetscFree3(rowBegin2, rowMid2, rowEnd2)); 6462 PetscCall(PetscFree(perm1)); 6463 PetscCall(PetscFree3(i2, j2, perm2)); 6464 6465 Ajmap1 = Ajmap1_new; 6466 Bjmap1 = Bjmap1_new; 6467 6468 /* Reallocate Aj, Bj once we know actual numbers of unique nonzeros in A and B */ 6469 if (Annz < Annz1 + Annz2) { 6470 PetscInt *Aj_new; 6471 PetscCall(PetscMalloc1(Annz, &Aj_new)); 6472 PetscCall(PetscArraycpy(Aj_new, Aj, Annz)); 6473 PetscCall(PetscFree(Aj)); 6474 Aj = Aj_new; 6475 } 6476 6477 if (Bnnz < Bnnz1 + Bnnz2) { 6478 PetscInt *Bj_new; 6479 PetscCall(PetscMalloc1(Bnnz, &Bj_new)); 6480 PetscCall(PetscArraycpy(Bj_new, Bj, Bnnz)); 6481 PetscCall(PetscFree(Bj)); 6482 Bj = Bj_new; 6483 } 6484 6485 /* --------------------------------------------------------------------------------*/ 6486 /* Create new submatrices for on-process and off-process coupling */ 6487 /* --------------------------------------------------------------------------------*/ 6488 PetscScalar *Aa, *Ba; 6489 MatType rtype; 6490 Mat_SeqAIJ *a, *b; 6491 PetscCall(PetscCalloc1(Annz, &Aa)); /* Zero matrix on device */ 6492 PetscCall(PetscCalloc1(Bnnz, &Ba)); 6493 /* make Aj[] local, i.e, based off the start column of the diagonal portion */ 6494 if (cstart) { 6495 for (k = 0; k < Annz; k++) Aj[k] -= cstart; 6496 } 6497 PetscCall(MatDestroy(&mpiaij->A)); 6498 PetscCall(MatDestroy(&mpiaij->B)); 6499 PetscCall(MatGetRootType_Private(mat, &rtype)); 6500 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, Ai, Aj, Aa, &mpiaij->A)); 6501 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, mat->cmap->N, Bi, Bj, Ba, &mpiaij->B)); 6502 PetscCall(MatSetUpMultiply_MPIAIJ(mat)); 6503 6504 a = (Mat_SeqAIJ *)mpiaij->A->data; 6505 b = (Mat_SeqAIJ *)mpiaij->B->data; 6506 a->singlemalloc = b->singlemalloc = PETSC_FALSE; /* Let newmat own Ai,Aj,Aa,Bi,Bj,Ba */ 6507 a->free_a = b->free_a = PETSC_TRUE; 6508 a->free_ij = b->free_ij = PETSC_TRUE; 6509 6510 /* conversion must happen AFTER multiply setup */ 6511 PetscCall(MatConvert(mpiaij->A, rtype, MAT_INPLACE_MATRIX, &mpiaij->A)); 6512 PetscCall(MatConvert(mpiaij->B, rtype, MAT_INPLACE_MATRIX, &mpiaij->B)); 6513 PetscCall(VecDestroy(&mpiaij->lvec)); 6514 PetscCall(MatCreateVecs(mpiaij->B, &mpiaij->lvec, NULL)); 6515 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)mpiaij->lvec)); 6516 6517 mpiaij->coo_n = coo_n; 6518 mpiaij->coo_sf = sf2; 6519 mpiaij->sendlen = nleaves; 6520 mpiaij->recvlen = nroots; 6521 6522 mpiaij->Annz = Annz; 6523 mpiaij->Bnnz = Bnnz; 6524 6525 mpiaij->Annz2 = Annz2; 6526 mpiaij->Bnnz2 = Bnnz2; 6527 6528 mpiaij->Atot1 = Atot1; 6529 mpiaij->Atot2 = Atot2; 6530 mpiaij->Btot1 = Btot1; 6531 mpiaij->Btot2 = Btot2; 6532 6533 mpiaij->Ajmap1 = Ajmap1; 6534 mpiaij->Aperm1 = Aperm1; 6535 6536 mpiaij->Bjmap1 = Bjmap1; 6537 mpiaij->Bperm1 = Bperm1; 6538 6539 mpiaij->Aimap2 = Aimap2; 6540 mpiaij->Ajmap2 = Ajmap2; 6541 mpiaij->Aperm2 = Aperm2; 6542 6543 mpiaij->Bimap2 = Bimap2; 6544 mpiaij->Bjmap2 = Bjmap2; 6545 mpiaij->Bperm2 = Bperm2; 6546 6547 mpiaij->Cperm1 = Cperm1; 6548 6549 /* Allocate in preallocation. If not used, it has zero cost on host */ 6550 PetscCall(PetscMalloc2(mpiaij->sendlen, &mpiaij->sendbuf, mpiaij->recvlen, &mpiaij->recvbuf)); 6551 PetscFunctionReturn(0); 6552 } 6553 6554 static PetscErrorCode MatSetValuesCOO_MPIAIJ(Mat mat, const PetscScalar v[], InsertMode imode) { 6555 Mat_MPIAIJ *mpiaij = (Mat_MPIAIJ *)mat->data; 6556 Mat A = mpiaij->A, B = mpiaij->B; 6557 PetscCount Annz = mpiaij->Annz, Annz2 = mpiaij->Annz2, Bnnz = mpiaij->Bnnz, Bnnz2 = mpiaij->Bnnz2; 6558 PetscScalar *Aa, *Ba; 6559 PetscScalar *sendbuf = mpiaij->sendbuf; 6560 PetscScalar *recvbuf = mpiaij->recvbuf; 6561 const PetscCount *Ajmap1 = mpiaij->Ajmap1, *Ajmap2 = mpiaij->Ajmap2, *Aimap2 = mpiaij->Aimap2; 6562 const PetscCount *Bjmap1 = mpiaij->Bjmap1, *Bjmap2 = mpiaij->Bjmap2, *Bimap2 = mpiaij->Bimap2; 6563 const PetscCount *Aperm1 = mpiaij->Aperm1, *Aperm2 = mpiaij->Aperm2, *Bperm1 = mpiaij->Bperm1, *Bperm2 = mpiaij->Bperm2; 6564 const PetscCount *Cperm1 = mpiaij->Cperm1; 6565 6566 PetscFunctionBegin; 6567 PetscCall(MatSeqAIJGetArray(A, &Aa)); /* Might read and write matrix values */ 6568 PetscCall(MatSeqAIJGetArray(B, &Ba)); 6569 6570 /* Pack entries to be sent to remote */ 6571 for (PetscCount i = 0; i < mpiaij->sendlen; i++) sendbuf[i] = v[Cperm1[i]]; 6572 6573 /* Send remote entries to their owner and overlap the communication with local computation */ 6574 PetscCall(PetscSFReduceWithMemTypeBegin(mpiaij->coo_sf, MPIU_SCALAR, PETSC_MEMTYPE_HOST, sendbuf, PETSC_MEMTYPE_HOST, recvbuf, MPI_REPLACE)); 6575 /* Add local entries to A and B */ 6576 for (PetscCount i = 0; i < Annz; i++) { /* All nonzeros in A are either zero'ed or added with a value (i.e., initialized) */ 6577 PetscScalar sum = 0.0; /* Do partial summation first to improve numerical stablility */ 6578 for (PetscCount k = Ajmap1[i]; k < Ajmap1[i + 1]; k++) sum += v[Aperm1[k]]; 6579 Aa[i] = (imode == INSERT_VALUES ? 0.0 : Aa[i]) + sum; 6580 } 6581 for (PetscCount i = 0; i < Bnnz; i++) { 6582 PetscScalar sum = 0.0; 6583 for (PetscCount k = Bjmap1[i]; k < Bjmap1[i + 1]; k++) sum += v[Bperm1[k]]; 6584 Ba[i] = (imode == INSERT_VALUES ? 0.0 : Ba[i]) + sum; 6585 } 6586 PetscCall(PetscSFReduceEnd(mpiaij->coo_sf, MPIU_SCALAR, sendbuf, recvbuf, MPI_REPLACE)); 6587 6588 /* Add received remote entries to A and B */ 6589 for (PetscCount i = 0; i < Annz2; i++) { 6590 for (PetscCount k = Ajmap2[i]; k < Ajmap2[i + 1]; k++) Aa[Aimap2[i]] += recvbuf[Aperm2[k]]; 6591 } 6592 for (PetscCount i = 0; i < Bnnz2; i++) { 6593 for (PetscCount k = Bjmap2[i]; k < Bjmap2[i + 1]; k++) Ba[Bimap2[i]] += recvbuf[Bperm2[k]]; 6594 } 6595 PetscCall(MatSeqAIJRestoreArray(A, &Aa)); 6596 PetscCall(MatSeqAIJRestoreArray(B, &Ba)); 6597 PetscFunctionReturn(0); 6598 } 6599 6600 /* ----------------------------------------------------------------*/ 6601 6602 /*MC 6603 MATMPIAIJ - MATMPIAIJ = "mpiaij" - A matrix type to be used for parallel sparse matrices. 6604 6605 Options Database Keys: 6606 . -mat_type mpiaij - sets the matrix type to "mpiaij" during a call to MatSetFromOptions() 6607 6608 Level: beginner 6609 6610 Notes: 6611 MatSetValues() may be called for this matrix type with a NULL argument for the numerical values, 6612 in this case the values associated with the rows and columns one passes in are set to zero 6613 in the matrix 6614 6615 MatSetOptions(,MAT_STRUCTURE_ONLY,PETSC_TRUE) may be called for this matrix type. In this no 6616 space is allocated for the nonzero entries and any entries passed with MatSetValues() are ignored 6617 6618 .seealso: `MatCreateAIJ()` 6619 M*/ 6620 6621 PETSC_EXTERN PetscErrorCode MatCreate_MPIAIJ(Mat B) { 6622 Mat_MPIAIJ *b; 6623 PetscMPIInt size; 6624 6625 PetscFunctionBegin; 6626 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 6627 6628 PetscCall(PetscNewLog(B, &b)); 6629 B->data = (void *)b; 6630 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 6631 B->assembled = PETSC_FALSE; 6632 B->insertmode = NOT_SET_VALUES; 6633 b->size = size; 6634 6635 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 6636 6637 /* build cache for off array entries formed */ 6638 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 6639 6640 b->donotstash = PETSC_FALSE; 6641 b->colmap = NULL; 6642 b->garray = NULL; 6643 b->roworiented = PETSC_TRUE; 6644 6645 /* stuff used for matrix vector multiply */ 6646 b->lvec = NULL; 6647 b->Mvctx = NULL; 6648 6649 /* stuff for MatGetRow() */ 6650 b->rowindices = NULL; 6651 b->rowvalues = NULL; 6652 b->getrowactive = PETSC_FALSE; 6653 6654 /* flexible pointer used in CUSPARSE classes */ 6655 b->spptr = NULL; 6656 6657 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetUseScalableIncreaseOverlap_C", MatMPIAIJSetUseScalableIncreaseOverlap_MPIAIJ)); 6658 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIAIJ)); 6659 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIAIJ)); 6660 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatIsTranspose_C", MatIsTranspose_MPIAIJ)); 6661 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocation_C", MatMPIAIJSetPreallocation_MPIAIJ)); 6662 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatResetPreallocation_C", MatResetPreallocation_MPIAIJ)); 6663 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIAIJSetPreallocationCSR_C", MatMPIAIJSetPreallocationCSR_MPIAIJ)); 6664 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIAIJ)); 6665 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijperm_C", MatConvert_MPIAIJ_MPIAIJPERM)); 6666 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijsell_C", MatConvert_MPIAIJ_MPIAIJSELL)); 6667 #if defined(PETSC_HAVE_CUDA) 6668 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcusparse_C", MatConvert_MPIAIJ_MPIAIJCUSPARSE)); 6669 #endif 6670 #if defined(PETSC_HAVE_KOKKOS_KERNELS) 6671 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijkokkos_C", MatConvert_MPIAIJ_MPIAIJKokkos)); 6672 #endif 6673 #if defined(PETSC_HAVE_MKL_SPARSE) 6674 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijmkl_C", MatConvert_MPIAIJ_MPIAIJMKL)); 6675 #endif 6676 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpiaijcrl_C", MatConvert_MPIAIJ_MPIAIJCRL)); 6677 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpibaij_C", MatConvert_MPIAIJ_MPIBAIJ)); 6678 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisbaij_C", MatConvert_MPIAIJ_MPISBAIJ)); 6679 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpidense_C", MatConvert_MPIAIJ_MPIDense)); 6680 #if defined(PETSC_HAVE_ELEMENTAL) 6681 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_elemental_C", MatConvert_MPIAIJ_Elemental)); 6682 #endif 6683 #if defined(PETSC_HAVE_SCALAPACK) 6684 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_scalapack_C", MatConvert_AIJ_ScaLAPACK)); 6685 #endif 6686 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_is_C", MatConvert_XAIJ_IS)); 6687 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_mpisell_C", MatConvert_MPIAIJ_MPISELL)); 6688 #if defined(PETSC_HAVE_HYPRE) 6689 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpiaij_hypre_C", MatConvert_AIJ_HYPRE)); 6690 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_transpose_mpiaij_mpiaij_C", MatProductSetFromOptions_Transpose_AIJ_AIJ)); 6691 #endif 6692 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_is_mpiaij_C", MatProductSetFromOptions_IS_XAIJ)); 6693 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatProductSetFromOptions_mpiaij_mpiaij_C", MatProductSetFromOptions_MPIAIJ)); 6694 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetPreallocationCOO_C", MatSetPreallocationCOO_MPIAIJ)); 6695 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetValuesCOO_C", MatSetValuesCOO_MPIAIJ)); 6696 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIAIJ)); 6697 PetscFunctionReturn(0); 6698 } 6699 6700 /*@C 6701 MatCreateMPIAIJWithSplitArrays - creates a MPI AIJ matrix using arrays that contain the "diagonal" 6702 and "off-diagonal" part of the matrix in CSR format. 6703 6704 Collective 6705 6706 Input Parameters: 6707 + comm - MPI communicator 6708 . m - number of local rows (Cannot be PETSC_DECIDE) 6709 . n - This value should be the same as the local size used in creating the 6710 x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have 6711 calculated if N is given) For square matrices n is almost always m. 6712 . M - number of global rows (or PETSC_DETERMINE to have calculated if m is given) 6713 . N - number of global columns (or PETSC_DETERMINE to have calculated if n is given) 6714 . i - row indices for "diagonal" portion of matrix; that is i[0] = 0, i[row] = i[row-1] + number of elements in that row of the matrix 6715 . j - column indices, which must be local, i.e., based off the start column of the diagonal portion 6716 . a - matrix values 6717 . oi - row indices for "off-diagonal" portion of matrix; that is oi[0] = 0, oi[row] = oi[row-1] + number of elements in that row of the matrix 6718 . oj - column indices, which must be global, representing global columns in the MPIAIJ matrix 6719 - oa - matrix values 6720 6721 Output Parameter: 6722 . mat - the matrix 6723 6724 Level: advanced 6725 6726 Notes: 6727 The i, j, and a arrays ARE NOT copied by this routine into the internal format used by PETSc. The user 6728 must free the arrays once the matrix has been destroyed and not before. 6729 6730 The i and j indices are 0 based 6731 6732 See MatCreateAIJ() for the definition of "diagonal" and "off-diagonal" portion of the matrix 6733 6734 This sets local rows and cannot be used to set off-processor values. 6735 6736 Use of this routine is discouraged because it is inflexible and cumbersome to use. It is extremely rare that a 6737 legacy application natively assembles into exactly this split format. The code to do so is nontrivial and does 6738 not easily support in-place reassembly. It is recommended to use MatSetValues() (or a variant thereof) because 6739 the resulting assembly is easier to implement, will work with any matrix format, and the user does not have to 6740 keep track of the underlying array. Use MatSetOption(A,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) to disable all 6741 communication if it is known that only local entries will be set. 6742 6743 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 6744 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithArrays()` 6745 @*/ 6746 PetscErrorCode MatCreateMPIAIJWithSplitArrays(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt i[], PetscInt j[], PetscScalar a[], PetscInt oi[], PetscInt oj[], PetscScalar oa[], Mat *mat) { 6747 Mat_MPIAIJ *maij; 6748 6749 PetscFunctionBegin; 6750 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 6751 PetscCheck(i[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 6752 PetscCheck(oi[0] == 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "oi (row indices) must start with 0"); 6753 PetscCall(MatCreate(comm, mat)); 6754 PetscCall(MatSetSizes(*mat, m, n, M, N)); 6755 PetscCall(MatSetType(*mat, MATMPIAIJ)); 6756 maij = (Mat_MPIAIJ *)(*mat)->data; 6757 6758 (*mat)->preallocated = PETSC_TRUE; 6759 6760 PetscCall(PetscLayoutSetUp((*mat)->rmap)); 6761 PetscCall(PetscLayoutSetUp((*mat)->cmap)); 6762 6763 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, n, i, j, a, &maij->A)); 6764 PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, m, (*mat)->cmap->N, oi, oj, oa, &maij->B)); 6765 6766 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 6767 PetscCall(MatAssemblyBegin(*mat, MAT_FINAL_ASSEMBLY)); 6768 PetscCall(MatAssemblyEnd(*mat, MAT_FINAL_ASSEMBLY)); 6769 PetscCall(MatSetOption(*mat, MAT_NO_OFF_PROC_ENTRIES, PETSC_FALSE)); 6770 PetscCall(MatSetOption(*mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 6771 PetscFunctionReturn(0); 6772 } 6773 6774 typedef struct { 6775 Mat *mp; /* intermediate products */ 6776 PetscBool *mptmp; /* is the intermediate product temporary ? */ 6777 PetscInt cp; /* number of intermediate products */ 6778 6779 /* support for MatGetBrowsOfAoCols_MPIAIJ for P_oth */ 6780 PetscInt *startsj_s, *startsj_r; 6781 PetscScalar *bufa; 6782 Mat P_oth; 6783 6784 /* may take advantage of merging product->B */ 6785 Mat Bloc; /* B-local by merging diag and off-diag */ 6786 6787 /* cusparse does not have support to split between symbolic and numeric phases. 6788 When api_user is true, we don't need to update the numerical values 6789 of the temporary storage */ 6790 PetscBool reusesym; 6791 6792 /* support for COO values insertion */ 6793 PetscScalar *coo_v, *coo_w; /* store on-process and off-process COO scalars, and used as MPI recv/send buffers respectively */ 6794 PetscInt **own; /* own[i] points to address of on-process COO indices for Mat mp[i] */ 6795 PetscInt **off; /* off[i] points to address of off-process COO indices for Mat mp[i] */ 6796 PetscBool hasoffproc; /* if true, have off-process values insertion (i.e. AtB or PtAP) */ 6797 PetscSF sf; /* used for non-local values insertion and memory malloc */ 6798 PetscMemType mtype; 6799 6800 /* customization */ 6801 PetscBool abmerge; 6802 PetscBool P_oth_bind; 6803 } MatMatMPIAIJBACKEND; 6804 6805 PetscErrorCode MatDestroy_MatMatMPIAIJBACKEND(void *data) { 6806 MatMatMPIAIJBACKEND *mmdata = (MatMatMPIAIJBACKEND *)data; 6807 PetscInt i; 6808 6809 PetscFunctionBegin; 6810 PetscCall(PetscFree2(mmdata->startsj_s, mmdata->startsj_r)); 6811 PetscCall(PetscFree(mmdata->bufa)); 6812 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_v)); 6813 PetscCall(PetscSFFree(mmdata->sf, mmdata->mtype, mmdata->coo_w)); 6814 PetscCall(MatDestroy(&mmdata->P_oth)); 6815 PetscCall(MatDestroy(&mmdata->Bloc)); 6816 PetscCall(PetscSFDestroy(&mmdata->sf)); 6817 for (i = 0; i < mmdata->cp; i++) { PetscCall(MatDestroy(&mmdata->mp[i])); } 6818 PetscCall(PetscFree2(mmdata->mp, mmdata->mptmp)); 6819 PetscCall(PetscFree(mmdata->own[0])); 6820 PetscCall(PetscFree(mmdata->own)); 6821 PetscCall(PetscFree(mmdata->off[0])); 6822 PetscCall(PetscFree(mmdata->off)); 6823 PetscCall(PetscFree(mmdata)); 6824 PetscFunctionReturn(0); 6825 } 6826 6827 /* Copy selected n entries with indices in idx[] of A to v[]. 6828 If idx is NULL, copy the whole data array of A to v[] 6829 */ 6830 static PetscErrorCode MatSeqAIJCopySubArray(Mat A, PetscInt n, const PetscInt idx[], PetscScalar v[]) { 6831 PetscErrorCode (*f)(Mat, PetscInt, const PetscInt[], PetscScalar[]); 6832 6833 PetscFunctionBegin; 6834 PetscCall(PetscObjectQueryFunction((PetscObject)A, "MatSeqAIJCopySubArray_C", &f)); 6835 if (f) { 6836 PetscCall((*f)(A, n, idx, v)); 6837 } else { 6838 const PetscScalar *vv; 6839 6840 PetscCall(MatSeqAIJGetArrayRead(A, &vv)); 6841 if (n && idx) { 6842 PetscScalar *w = v; 6843 const PetscInt *oi = idx; 6844 PetscInt j; 6845 6846 for (j = 0; j < n; j++) *w++ = vv[*oi++]; 6847 } else { 6848 PetscCall(PetscArraycpy(v, vv, n)); 6849 } 6850 PetscCall(MatSeqAIJRestoreArrayRead(A, &vv)); 6851 } 6852 PetscFunctionReturn(0); 6853 } 6854 6855 static PetscErrorCode MatProductNumeric_MPIAIJBACKEND(Mat C) { 6856 MatMatMPIAIJBACKEND *mmdata; 6857 PetscInt i, n_d, n_o; 6858 6859 PetscFunctionBegin; 6860 MatCheckProduct(C, 1); 6861 PetscCheck(C->product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data empty"); 6862 mmdata = (MatMatMPIAIJBACKEND *)C->product->data; 6863 if (!mmdata->reusesym) { /* update temporary matrices */ 6864 if (mmdata->P_oth) { PetscCall(MatGetBrowsOfAoCols_MPIAIJ(C->product->A, C->product->B, MAT_REUSE_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); } 6865 if (mmdata->Bloc) { PetscCall(MatMPIAIJGetLocalMatMerge(C->product->B, MAT_REUSE_MATRIX, NULL, &mmdata->Bloc)); } 6866 } 6867 mmdata->reusesym = PETSC_FALSE; 6868 6869 for (i = 0; i < mmdata->cp; i++) { 6870 PetscCheck(mmdata->mp[i]->ops->productnumeric, PetscObjectComm((PetscObject)mmdata->mp[i]), PETSC_ERR_PLIB, "Missing numeric op for %s", MatProductTypes[mmdata->mp[i]->product->type]); 6871 PetscCall((*mmdata->mp[i]->ops->productnumeric)(mmdata->mp[i])); 6872 } 6873 for (i = 0, n_d = 0, n_o = 0; i < mmdata->cp; i++) { 6874 PetscInt noff = mmdata->off[i + 1] - mmdata->off[i]; 6875 6876 if (mmdata->mptmp[i]) continue; 6877 if (noff) { 6878 PetscInt nown = mmdata->own[i + 1] - mmdata->own[i]; 6879 6880 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], noff, mmdata->off[i], mmdata->coo_w + n_o)); 6881 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], nown, mmdata->own[i], mmdata->coo_v + n_d)); 6882 n_o += noff; 6883 n_d += nown; 6884 } else { 6885 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mmdata->mp[i]->data; 6886 6887 PetscCall(MatSeqAIJCopySubArray(mmdata->mp[i], mm->nz, NULL, mmdata->coo_v + n_d)); 6888 n_d += mm->nz; 6889 } 6890 } 6891 if (mmdata->hasoffproc) { /* offprocess insertion */ 6892 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6893 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_SCALAR, mmdata->coo_w, mmdata->coo_v + n_d)); 6894 } 6895 PetscCall(MatSetValuesCOO(C, mmdata->coo_v, INSERT_VALUES)); 6896 PetscFunctionReturn(0); 6897 } 6898 6899 /* Support for Pt * A, A * P, or Pt * A * P */ 6900 #define MAX_NUMBER_INTERMEDIATE 4 6901 PetscErrorCode MatProductSymbolic_MPIAIJBACKEND(Mat C) { 6902 Mat_Product *product = C->product; 6903 Mat A, P, mp[MAX_NUMBER_INTERMEDIATE]; /* A, P and a series of intermediate matrices */ 6904 Mat_MPIAIJ *a, *p; 6905 MatMatMPIAIJBACKEND *mmdata; 6906 ISLocalToGlobalMapping P_oth_l2g = NULL; 6907 IS glob = NULL; 6908 const char *prefix; 6909 char pprefix[256]; 6910 const PetscInt *globidx, *P_oth_idx; 6911 PetscInt i, j, cp, m, n, M, N, *coo_i, *coo_j; 6912 PetscCount ncoo, ncoo_d, ncoo_o, ncoo_oown; 6913 PetscInt cmapt[MAX_NUMBER_INTERMEDIATE], rmapt[MAX_NUMBER_INTERMEDIATE]; /* col/row map type for each Mat in mp[]. */ 6914 /* type-0: consecutive, start from 0; type-1: consecutive with */ 6915 /* a base offset; type-2: sparse with a local to global map table */ 6916 const PetscInt *cmapa[MAX_NUMBER_INTERMEDIATE], *rmapa[MAX_NUMBER_INTERMEDIATE]; /* col/row local to global map array (table) for type-2 map type */ 6917 6918 MatProductType ptype; 6919 PetscBool mptmp[MAX_NUMBER_INTERMEDIATE], hasoffproc = PETSC_FALSE, iscuda, iskokk; 6920 PetscMPIInt size; 6921 6922 PetscFunctionBegin; 6923 MatCheckProduct(C, 1); 6924 PetscCheck(!product->data, PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Product data not empty"); 6925 ptype = product->type; 6926 if (product->A->symmetric == PETSC_BOOL3_TRUE && ptype == MATPRODUCT_AtB) { 6927 ptype = MATPRODUCT_AB; 6928 product->symbolic_used_the_fact_A_is_symmetric = PETSC_TRUE; 6929 } 6930 switch (ptype) { 6931 case MATPRODUCT_AB: 6932 A = product->A; 6933 P = product->B; 6934 m = A->rmap->n; 6935 n = P->cmap->n; 6936 M = A->rmap->N; 6937 N = P->cmap->N; 6938 hasoffproc = PETSC_FALSE; /* will not scatter mat product values to other processes */ 6939 break; 6940 case MATPRODUCT_AtB: 6941 P = product->A; 6942 A = product->B; 6943 m = P->cmap->n; 6944 n = A->cmap->n; 6945 M = P->cmap->N; 6946 N = A->cmap->N; 6947 hasoffproc = PETSC_TRUE; 6948 break; 6949 case MATPRODUCT_PtAP: 6950 A = product->A; 6951 P = product->B; 6952 m = P->cmap->n; 6953 n = P->cmap->n; 6954 M = P->cmap->N; 6955 N = P->cmap->N; 6956 hasoffproc = PETSC_TRUE; 6957 break; 6958 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 6959 } 6960 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)C), &size)); 6961 if (size == 1) hasoffproc = PETSC_FALSE; 6962 6963 /* defaults */ 6964 for (i = 0; i < MAX_NUMBER_INTERMEDIATE; i++) { 6965 mp[i] = NULL; 6966 mptmp[i] = PETSC_FALSE; 6967 rmapt[i] = -1; 6968 cmapt[i] = -1; 6969 rmapa[i] = NULL; 6970 cmapa[i] = NULL; 6971 } 6972 6973 /* customization */ 6974 PetscCall(PetscNew(&mmdata)); 6975 mmdata->reusesym = product->api_user; 6976 if (ptype == MATPRODUCT_AB) { 6977 if (product->api_user) { 6978 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatMatMult", "Mat"); 6979 PetscCall(PetscOptionsBool("-matmatmult_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6980 PetscCall(PetscOptionsBool("-matmatmult_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6981 PetscOptionsEnd(); 6982 } else { 6983 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_AB", "Mat"); 6984 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_mergeB", "Merge product->B local matrices", "MatMatMult", mmdata->abmerge, &mmdata->abmerge, NULL)); 6985 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6986 PetscOptionsEnd(); 6987 } 6988 } else if (ptype == MATPRODUCT_PtAP) { 6989 if (product->api_user) { 6990 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatPtAP", "Mat"); 6991 PetscCall(PetscOptionsBool("-matptap_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6992 PetscOptionsEnd(); 6993 } else { 6994 PetscOptionsBegin(PetscObjectComm((PetscObject)C), ((PetscObject)C)->prefix, "MatProduct_PtAP", "Mat"); 6995 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_pothbind", "Bind P_oth to CPU", "MatBindToCPU", mmdata->P_oth_bind, &mmdata->P_oth_bind, NULL)); 6996 PetscOptionsEnd(); 6997 } 6998 } 6999 a = (Mat_MPIAIJ *)A->data; 7000 p = (Mat_MPIAIJ *)P->data; 7001 PetscCall(MatSetSizes(C, m, n, M, N)); 7002 PetscCall(PetscLayoutSetUp(C->rmap)); 7003 PetscCall(PetscLayoutSetUp(C->cmap)); 7004 PetscCall(MatSetType(C, ((PetscObject)A)->type_name)); 7005 PetscCall(MatGetOptionsPrefix(C, &prefix)); 7006 7007 cp = 0; 7008 switch (ptype) { 7009 case MATPRODUCT_AB: /* A * P */ 7010 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7011 7012 /* A_diag * P_local (merged or not) */ 7013 if (mmdata->abmerge) { /* P's diagonal and off-diag blocks are merged to one matrix, then multiplied by A_diag */ 7014 /* P is product->B */ 7015 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7016 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7017 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7018 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7019 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7020 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7021 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7022 mp[cp]->product->api_user = product->api_user; 7023 PetscCall(MatProductSetFromOptions(mp[cp])); 7024 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7025 PetscCall(ISGetIndices(glob, &globidx)); 7026 rmapt[cp] = 1; 7027 cmapt[cp] = 2; 7028 cmapa[cp] = globidx; 7029 mptmp[cp] = PETSC_FALSE; 7030 cp++; 7031 } else { /* A_diag * P_diag and A_diag * P_off */ 7032 PetscCall(MatProductCreate(a->A, p->A, NULL, &mp[cp])); 7033 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7034 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7035 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7036 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7037 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7038 mp[cp]->product->api_user = product->api_user; 7039 PetscCall(MatProductSetFromOptions(mp[cp])); 7040 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7041 rmapt[cp] = 1; 7042 cmapt[cp] = 1; 7043 mptmp[cp] = PETSC_FALSE; 7044 cp++; 7045 PetscCall(MatProductCreate(a->A, p->B, NULL, &mp[cp])); 7046 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7047 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7048 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7049 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7050 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7051 mp[cp]->product->api_user = product->api_user; 7052 PetscCall(MatProductSetFromOptions(mp[cp])); 7053 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7054 rmapt[cp] = 1; 7055 cmapt[cp] = 2; 7056 cmapa[cp] = p->garray; 7057 mptmp[cp] = PETSC_FALSE; 7058 cp++; 7059 } 7060 7061 /* A_off * P_other */ 7062 if (mmdata->P_oth) { 7063 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); /* make P_oth use local col ids */ 7064 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7065 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7066 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7067 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7068 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7069 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7070 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7071 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7072 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7073 mp[cp]->product->api_user = product->api_user; 7074 PetscCall(MatProductSetFromOptions(mp[cp])); 7075 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7076 rmapt[cp] = 1; 7077 cmapt[cp] = 2; 7078 cmapa[cp] = P_oth_idx; 7079 mptmp[cp] = PETSC_FALSE; 7080 cp++; 7081 } 7082 break; 7083 7084 case MATPRODUCT_AtB: /* (P^t * A): P_diag * A_loc + P_off * A_loc */ 7085 /* A is product->B */ 7086 PetscCall(MatMPIAIJGetLocalMatMerge(A, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7087 if (A == P) { /* when A==P, we can take advantage of the already merged mmdata->Bloc */ 7088 PetscCall(MatProductCreate(mmdata->Bloc, mmdata->Bloc, NULL, &mp[cp])); 7089 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7090 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7091 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7092 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7093 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7094 mp[cp]->product->api_user = product->api_user; 7095 PetscCall(MatProductSetFromOptions(mp[cp])); 7096 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7097 PetscCall(ISGetIndices(glob, &globidx)); 7098 rmapt[cp] = 2; 7099 rmapa[cp] = globidx; 7100 cmapt[cp] = 2; 7101 cmapa[cp] = globidx; 7102 mptmp[cp] = PETSC_FALSE; 7103 cp++; 7104 } else { 7105 PetscCall(MatProductCreate(p->A, mmdata->Bloc, NULL, &mp[cp])); 7106 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7107 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7108 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7109 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7110 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7111 mp[cp]->product->api_user = product->api_user; 7112 PetscCall(MatProductSetFromOptions(mp[cp])); 7113 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7114 PetscCall(ISGetIndices(glob, &globidx)); 7115 rmapt[cp] = 1; 7116 cmapt[cp] = 2; 7117 cmapa[cp] = globidx; 7118 mptmp[cp] = PETSC_FALSE; 7119 cp++; 7120 PetscCall(MatProductCreate(p->B, mmdata->Bloc, NULL, &mp[cp])); 7121 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7122 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7123 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7124 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7125 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7126 mp[cp]->product->api_user = product->api_user; 7127 PetscCall(MatProductSetFromOptions(mp[cp])); 7128 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7129 rmapt[cp] = 2; 7130 rmapa[cp] = p->garray; 7131 cmapt[cp] = 2; 7132 cmapa[cp] = globidx; 7133 mptmp[cp] = PETSC_FALSE; 7134 cp++; 7135 } 7136 break; 7137 case MATPRODUCT_PtAP: 7138 PetscCall(MatGetBrowsOfAoCols_MPIAIJ(A, P, MAT_INITIAL_MATRIX, &mmdata->startsj_s, &mmdata->startsj_r, &mmdata->bufa, &mmdata->P_oth)); 7139 /* P is product->B */ 7140 PetscCall(MatMPIAIJGetLocalMatMerge(P, MAT_INITIAL_MATRIX, &glob, &mmdata->Bloc)); 7141 PetscCall(MatProductCreate(a->A, mmdata->Bloc, NULL, &mp[cp])); 7142 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_PtAP)); 7143 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7144 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7145 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7146 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7147 mp[cp]->product->api_user = product->api_user; 7148 PetscCall(MatProductSetFromOptions(mp[cp])); 7149 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7150 PetscCall(ISGetIndices(glob, &globidx)); 7151 rmapt[cp] = 2; 7152 rmapa[cp] = globidx; 7153 cmapt[cp] = 2; 7154 cmapa[cp] = globidx; 7155 mptmp[cp] = PETSC_FALSE; 7156 cp++; 7157 if (mmdata->P_oth) { 7158 PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(mmdata->P_oth, &P_oth_l2g)); 7159 PetscCall(ISLocalToGlobalMappingGetIndices(P_oth_l2g, &P_oth_idx)); 7160 PetscCall(MatSetType(mmdata->P_oth, ((PetscObject)(a->B))->type_name)); 7161 PetscCall(MatBindToCPU(mmdata->P_oth, mmdata->P_oth_bind)); 7162 PetscCall(MatProductCreate(a->B, mmdata->P_oth, NULL, &mp[cp])); 7163 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AB)); 7164 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7165 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7166 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7167 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7168 mp[cp]->product->api_user = product->api_user; 7169 PetscCall(MatProductSetFromOptions(mp[cp])); 7170 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7171 mptmp[cp] = PETSC_TRUE; 7172 cp++; 7173 PetscCall(MatProductCreate(mmdata->Bloc, mp[1], NULL, &mp[cp])); 7174 PetscCall(MatProductSetType(mp[cp], MATPRODUCT_AtB)); 7175 PetscCall(MatProductSetFill(mp[cp], product->fill)); 7176 PetscCall(PetscSNPrintf(pprefix, sizeof(pprefix), "backend_p%" PetscInt_FMT "_", cp)); 7177 PetscCall(MatSetOptionsPrefix(mp[cp], prefix)); 7178 PetscCall(MatAppendOptionsPrefix(mp[cp], pprefix)); 7179 mp[cp]->product->api_user = product->api_user; 7180 PetscCall(MatProductSetFromOptions(mp[cp])); 7181 PetscCall((*mp[cp]->ops->productsymbolic)(mp[cp])); 7182 rmapt[cp] = 2; 7183 rmapa[cp] = globidx; 7184 cmapt[cp] = 2; 7185 cmapa[cp] = P_oth_idx; 7186 mptmp[cp] = PETSC_FALSE; 7187 cp++; 7188 } 7189 break; 7190 default: SETERRQ(PetscObjectComm((PetscObject)C), PETSC_ERR_PLIB, "Not for product type %s", MatProductTypes[ptype]); 7191 } 7192 /* sanity check */ 7193 if (size > 1) 7194 for (i = 0; i < cp; i++) PetscCheck(rmapt[i] != 2 || hasoffproc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected offproc map type for product %" PetscInt_FMT, i); 7195 7196 PetscCall(PetscMalloc2(cp, &mmdata->mp, cp, &mmdata->mptmp)); 7197 for (i = 0; i < cp; i++) { 7198 mmdata->mp[i] = mp[i]; 7199 mmdata->mptmp[i] = mptmp[i]; 7200 } 7201 mmdata->cp = cp; 7202 C->product->data = mmdata; 7203 C->product->destroy = MatDestroy_MatMatMPIAIJBACKEND; 7204 C->ops->productnumeric = MatProductNumeric_MPIAIJBACKEND; 7205 7206 /* memory type */ 7207 mmdata->mtype = PETSC_MEMTYPE_HOST; 7208 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iscuda, MATSEQAIJCUSPARSE, MATMPIAIJCUSPARSE, "")); 7209 PetscCall(PetscObjectTypeCompareAny((PetscObject)C, &iskokk, MATSEQAIJKOKKOS, MATMPIAIJKOKKOS, "")); 7210 if (iscuda) mmdata->mtype = PETSC_MEMTYPE_CUDA; 7211 else if (iskokk) mmdata->mtype = PETSC_MEMTYPE_KOKKOS; 7212 7213 /* prepare coo coordinates for values insertion */ 7214 7215 /* count total nonzeros of those intermediate seqaij Mats 7216 ncoo_d: # of nonzeros of matrices that do not have offproc entries 7217 ncoo_o: # of nonzeros (of matrices that might have offproc entries) that will be inserted to remote procs 7218 ncoo_oown: # of nonzeros (of matrices that might have offproc entries) that will be inserted locally 7219 */ 7220 for (cp = 0, ncoo_d = 0, ncoo_o = 0, ncoo_oown = 0; cp < mmdata->cp; cp++) { 7221 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7222 if (mptmp[cp]) continue; 7223 if (rmapt[cp] == 2 && hasoffproc) { /* the rows need to be scatter to all processes (might include self) */ 7224 const PetscInt *rmap = rmapa[cp]; 7225 const PetscInt mr = mp[cp]->rmap->n; 7226 const PetscInt rs = C->rmap->rstart; 7227 const PetscInt re = C->rmap->rend; 7228 const PetscInt *ii = mm->i; 7229 for (i = 0; i < mr; i++) { 7230 const PetscInt gr = rmap[i]; 7231 const PetscInt nz = ii[i + 1] - ii[i]; 7232 if (gr < rs || gr >= re) ncoo_o += nz; /* this row is offproc */ 7233 else ncoo_oown += nz; /* this row is local */ 7234 } 7235 } else ncoo_d += mm->nz; 7236 } 7237 7238 /* 7239 ncoo: total number of nonzeros (including those inserted by remote procs) belonging to this proc 7240 7241 ncoo = ncoo_d + ncoo_oown + ncoo2, which ncoo2 is number of nonzeros inserted to me by other procs. 7242 7243 off[0] points to a big index array, which is shared by off[1,2,...]. Similarily, for own[0]. 7244 7245 off[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert to others 7246 own[p]: points to the segment for matrix mp[p], storing location of nonzeros that mp[p] will insert locally 7247 so, off[p+1]-off[p] is the number of nonzeros that mp[p] will send to others. 7248 7249 coo_i/j/v[]: [ncoo] row/col/val of nonzeros belonging to this proc. 7250 Ex. coo_i[]: the beginning part (of size ncoo_d + ncoo_oown) stores i of local nonzeros, and the remaing part stores i of nonzeros I will receive. 7251 */ 7252 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->off)); /* +1 to make a csr-like data structure */ 7253 PetscCall(PetscCalloc1(mmdata->cp + 1, &mmdata->own)); 7254 7255 /* gather (i,j) of nonzeros inserted by remote procs */ 7256 if (hasoffproc) { 7257 PetscSF msf; 7258 PetscInt ncoo2, *coo_i2, *coo_j2; 7259 7260 PetscCall(PetscMalloc1(ncoo_o, &mmdata->off[0])); 7261 PetscCall(PetscMalloc1(ncoo_oown, &mmdata->own[0])); 7262 PetscCall(PetscMalloc2(ncoo_o, &coo_i, ncoo_o, &coo_j)); /* to collect (i,j) of entries to be sent to others */ 7263 7264 for (cp = 0, ncoo_o = 0; cp < mmdata->cp; cp++) { 7265 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7266 PetscInt *idxoff = mmdata->off[cp]; 7267 PetscInt *idxown = mmdata->own[cp]; 7268 if (!mptmp[cp] && rmapt[cp] == 2) { /* row map is sparse */ 7269 const PetscInt *rmap = rmapa[cp]; 7270 const PetscInt *cmap = cmapa[cp]; 7271 const PetscInt *ii = mm->i; 7272 PetscInt *coi = coo_i + ncoo_o; 7273 PetscInt *coj = coo_j + ncoo_o; 7274 const PetscInt mr = mp[cp]->rmap->n; 7275 const PetscInt rs = C->rmap->rstart; 7276 const PetscInt re = C->rmap->rend; 7277 const PetscInt cs = C->cmap->rstart; 7278 for (i = 0; i < mr; i++) { 7279 const PetscInt *jj = mm->j + ii[i]; 7280 const PetscInt gr = rmap[i]; 7281 const PetscInt nz = ii[i + 1] - ii[i]; 7282 if (gr < rs || gr >= re) { /* this is an offproc row */ 7283 for (j = ii[i]; j < ii[i + 1]; j++) { 7284 *coi++ = gr; 7285 *idxoff++ = j; 7286 } 7287 if (!cmapt[cp]) { /* already global */ 7288 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7289 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7290 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7291 } else { /* offdiag */ 7292 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7293 } 7294 ncoo_o += nz; 7295 } else { /* this is a local row */ 7296 for (j = ii[i]; j < ii[i + 1]; j++) *idxown++ = j; 7297 } 7298 } 7299 } 7300 mmdata->off[cp + 1] = idxoff; 7301 mmdata->own[cp + 1] = idxown; 7302 } 7303 7304 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7305 PetscCall(PetscSFSetGraphLayout(mmdata->sf, C->rmap, ncoo_o /*nleaves*/, NULL /*ilocal*/, PETSC_OWN_POINTER, coo_i)); 7306 PetscCall(PetscSFGetMultiSF(mmdata->sf, &msf)); 7307 PetscCall(PetscSFGetGraph(msf, &ncoo2 /*nroots*/, NULL, NULL, NULL)); 7308 ncoo = ncoo_d + ncoo_oown + ncoo2; 7309 PetscCall(PetscMalloc2(ncoo, &coo_i2, ncoo, &coo_j2)); 7310 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); /* put (i,j) of remote nonzeros at back */ 7311 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_i, coo_i2 + ncoo_d + ncoo_oown)); 7312 PetscCall(PetscSFGatherBegin(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7313 PetscCall(PetscSFGatherEnd(mmdata->sf, MPIU_INT, coo_j, coo_j2 + ncoo_d + ncoo_oown)); 7314 PetscCall(PetscFree2(coo_i, coo_j)); 7315 /* allocate MPI send buffer to collect nonzero values to be sent to remote procs */ 7316 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo_o * sizeof(PetscScalar), (void **)&mmdata->coo_w)); 7317 coo_i = coo_i2; 7318 coo_j = coo_j2; 7319 } else { /* no offproc values insertion */ 7320 ncoo = ncoo_d; 7321 PetscCall(PetscMalloc2(ncoo, &coo_i, ncoo, &coo_j)); 7322 7323 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)C), &mmdata->sf)); 7324 PetscCall(PetscSFSetGraph(mmdata->sf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER)); 7325 PetscCall(PetscSFSetUp(mmdata->sf)); 7326 } 7327 mmdata->hasoffproc = hasoffproc; 7328 7329 /* gather (i,j) of nonzeros inserted locally */ 7330 for (cp = 0, ncoo_d = 0; cp < mmdata->cp; cp++) { 7331 Mat_SeqAIJ *mm = (Mat_SeqAIJ *)mp[cp]->data; 7332 PetscInt *coi = coo_i + ncoo_d; 7333 PetscInt *coj = coo_j + ncoo_d; 7334 const PetscInt *jj = mm->j; 7335 const PetscInt *ii = mm->i; 7336 const PetscInt *cmap = cmapa[cp]; 7337 const PetscInt *rmap = rmapa[cp]; 7338 const PetscInt mr = mp[cp]->rmap->n; 7339 const PetscInt rs = C->rmap->rstart; 7340 const PetscInt re = C->rmap->rend; 7341 const PetscInt cs = C->cmap->rstart; 7342 7343 if (mptmp[cp]) continue; 7344 if (rmapt[cp] == 1) { /* consecutive rows */ 7345 /* fill coo_i */ 7346 for (i = 0; i < mr; i++) { 7347 const PetscInt gr = i + rs; 7348 for (j = ii[i]; j < ii[i + 1]; j++) coi[j] = gr; 7349 } 7350 /* fill coo_j */ 7351 if (!cmapt[cp]) { /* type-0, already global */ 7352 PetscCall(PetscArraycpy(coj, jj, mm->nz)); 7353 } else if (cmapt[cp] == 1) { /* type-1, local to global for consecutive columns of C */ 7354 for (j = 0; j < mm->nz; j++) coj[j] = jj[j] + cs; /* lid + col start */ 7355 } else { /* type-2, local to global for sparse columns */ 7356 for (j = 0; j < mm->nz; j++) coj[j] = cmap[jj[j]]; 7357 } 7358 ncoo_d += mm->nz; 7359 } else if (rmapt[cp] == 2) { /* sparse rows */ 7360 for (i = 0; i < mr; i++) { 7361 const PetscInt *jj = mm->j + ii[i]; 7362 const PetscInt gr = rmap[i]; 7363 const PetscInt nz = ii[i + 1] - ii[i]; 7364 if (gr >= rs && gr < re) { /* local rows */ 7365 for (j = ii[i]; j < ii[i + 1]; j++) *coi++ = gr; 7366 if (!cmapt[cp]) { /* type-0, already global */ 7367 for (j = 0; j < nz; j++) *coj++ = jj[j]; 7368 } else if (cmapt[cp] == 1) { /* local to global for owned columns of C */ 7369 for (j = 0; j < nz; j++) *coj++ = jj[j] + cs; 7370 } else { /* type-2, local to global for sparse columns */ 7371 for (j = 0; j < nz; j++) *coj++ = cmap[jj[j]]; 7372 } 7373 ncoo_d += nz; 7374 } 7375 } 7376 } 7377 } 7378 if (glob) { PetscCall(ISRestoreIndices(glob, &globidx)); } 7379 PetscCall(ISDestroy(&glob)); 7380 if (P_oth_l2g) { PetscCall(ISLocalToGlobalMappingRestoreIndices(P_oth_l2g, &P_oth_idx)); } 7381 PetscCall(ISLocalToGlobalMappingDestroy(&P_oth_l2g)); 7382 /* allocate an array to store all nonzeros (inserted locally or remotely) belonging to this proc */ 7383 PetscCall(PetscSFMalloc(mmdata->sf, mmdata->mtype, ncoo * sizeof(PetscScalar), (void **)&mmdata->coo_v)); 7384 7385 /* preallocate with COO data */ 7386 PetscCall(MatSetPreallocationCOO(C, ncoo, coo_i, coo_j)); 7387 PetscCall(PetscFree2(coo_i, coo_j)); 7388 PetscFunctionReturn(0); 7389 } 7390 7391 PetscErrorCode MatProductSetFromOptions_MPIAIJBACKEND(Mat mat) { 7392 Mat_Product *product = mat->product; 7393 #if defined(PETSC_HAVE_DEVICE) 7394 PetscBool match = PETSC_FALSE; 7395 PetscBool usecpu = PETSC_FALSE; 7396 #else 7397 PetscBool match = PETSC_TRUE; 7398 #endif 7399 7400 PetscFunctionBegin; 7401 MatCheckProduct(mat, 1); 7402 #if defined(PETSC_HAVE_DEVICE) 7403 if (!product->A->boundtocpu && !product->B->boundtocpu) { PetscCall(PetscObjectTypeCompare((PetscObject)product->B, ((PetscObject)product->A)->type_name, &match)); } 7404 if (match) { /* we can always fallback to the CPU if requested */ 7405 switch (product->type) { 7406 case MATPRODUCT_AB: 7407 if (product->api_user) { 7408 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatMatMult", "Mat"); 7409 PetscCall(PetscOptionsBool("-matmatmult_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7410 PetscOptionsEnd(); 7411 } else { 7412 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AB", "Mat"); 7413 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatMatMult", usecpu, &usecpu, NULL)); 7414 PetscOptionsEnd(); 7415 } 7416 break; 7417 case MATPRODUCT_AtB: 7418 if (product->api_user) { 7419 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatTransposeMatMult", "Mat"); 7420 PetscCall(PetscOptionsBool("-mattransposematmult_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7421 PetscOptionsEnd(); 7422 } else { 7423 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_AtB", "Mat"); 7424 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatTransposeMatMult", usecpu, &usecpu, NULL)); 7425 PetscOptionsEnd(); 7426 } 7427 break; 7428 case MATPRODUCT_PtAP: 7429 if (product->api_user) { 7430 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatPtAP", "Mat"); 7431 PetscCall(PetscOptionsBool("-matptap_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7432 PetscOptionsEnd(); 7433 } else { 7434 PetscOptionsBegin(PetscObjectComm((PetscObject)mat), ((PetscObject)mat)->prefix, "MatProduct_PtAP", "Mat"); 7435 PetscCall(PetscOptionsBool("-mat_product_algorithm_backend_cpu", "Use CPU code", "MatPtAP", usecpu, &usecpu, NULL)); 7436 PetscOptionsEnd(); 7437 } 7438 break; 7439 default: break; 7440 } 7441 match = (PetscBool)!usecpu; 7442 } 7443 #endif 7444 if (match) { 7445 switch (product->type) { 7446 case MATPRODUCT_AB: 7447 case MATPRODUCT_AtB: 7448 case MATPRODUCT_PtAP: mat->ops->productsymbolic = MatProductSymbolic_MPIAIJBACKEND; break; 7449 default: break; 7450 } 7451 } 7452 /* fallback to MPIAIJ ops */ 7453 if (!mat->ops->productsymbolic) PetscCall(MatProductSetFromOptions_MPIAIJ(mat)); 7454 PetscFunctionReturn(0); 7455 } 7456 7457 /* 7458 Produces a set of block column indices of the matrix row, one for each block represented in the original row 7459 7460 n - the number of block indices in cc[] 7461 cc - the block indices (must be large enough to contain the indices) 7462 */ 7463 static inline PetscErrorCode MatCollapseRow(Mat Amat, PetscInt row, PetscInt bs, PetscInt *n, PetscInt *cc) { 7464 PetscInt cnt = -1, nidx, j; 7465 const PetscInt *idx; 7466 7467 PetscFunctionBegin; 7468 PetscCall(MatGetRow(Amat, row, &nidx, &idx, NULL)); 7469 if (nidx) { 7470 cnt = 0; 7471 cc[cnt] = idx[0] / bs; 7472 for (j = 1; j < nidx; j++) { 7473 if (cc[cnt] < idx[j] / bs) cc[++cnt] = idx[j] / bs; 7474 } 7475 } 7476 PetscCall(MatRestoreRow(Amat, row, &nidx, &idx, NULL)); 7477 *n = cnt + 1; 7478 PetscFunctionReturn(0); 7479 } 7480 7481 /* 7482 Produces a set of block column indices of the matrix block row, one for each block represented in the original set of rows 7483 7484 ncollapsed - the number of block indices 7485 collapsed - the block indices (must be large enough to contain the indices) 7486 */ 7487 static inline PetscErrorCode MatCollapseRows(Mat Amat, PetscInt start, PetscInt bs, PetscInt *w0, PetscInt *w1, PetscInt *w2, PetscInt *ncollapsed, PetscInt **collapsed) { 7488 PetscInt i, nprev, *cprev = w0, ncur = 0, *ccur = w1, *merged = w2, *cprevtmp; 7489 7490 PetscFunctionBegin; 7491 PetscCall(MatCollapseRow(Amat, start, bs, &nprev, cprev)); 7492 for (i = start + 1; i < start + bs; i++) { 7493 PetscCall(MatCollapseRow(Amat, i, bs, &ncur, ccur)); 7494 PetscCall(PetscMergeIntArray(nprev, cprev, ncur, ccur, &nprev, &merged)); 7495 cprevtmp = cprev; 7496 cprev = merged; 7497 merged = cprevtmp; 7498 } 7499 *ncollapsed = nprev; 7500 if (collapsed) *collapsed = cprev; 7501 PetscFunctionReturn(0); 7502 } 7503 7504 /* -------------------------------------------------------------------------- */ 7505 /* 7506 MatCreateGraph_Simple_AIJ - create simple scalar matrix (graph) from potentially blocked matrix 7507 7508 Input Parameter: 7509 . Amat - matrix 7510 - symmetrize - make the result symmetric 7511 + scale - scale with diagonal 7512 7513 Output Parameter: 7514 . a_Gmat - output scalar graph >= 0 7515 7516 */ 7517 PETSC_INTERN PetscErrorCode MatCreateGraph_Simple_AIJ(Mat Amat, PetscBool symmetrize, PetscBool scale, Mat *a_Gmat) { 7518 PetscInt Istart, Iend, Ii, jj, kk, ncols, nloc, NN, MM, bs; 7519 MPI_Comm comm; 7520 Mat Gmat; 7521 PetscBool ismpiaij, isseqaij; 7522 Mat a, b, c; 7523 MatType jtype; 7524 7525 PetscFunctionBegin; 7526 PetscCall(PetscObjectGetComm((PetscObject)Amat, &comm)); 7527 PetscCall(MatGetOwnershipRange(Amat, &Istart, &Iend)); 7528 PetscCall(MatGetSize(Amat, &MM, &NN)); 7529 PetscCall(MatGetBlockSize(Amat, &bs)); 7530 nloc = (Iend - Istart) / bs; 7531 7532 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATSEQAIJ, &isseqaij)); 7533 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Amat, MATMPIAIJ, &ismpiaij)); 7534 PetscCheck(isseqaij || ismpiaij, comm, PETSC_ERR_USER, "Require (MPI)AIJ matrix type"); 7535 7536 /* TODO GPU: these calls are potentially expensive if matrices are large and we want to use the GPU */ 7537 /* A solution consists in providing a new API, MatAIJGetCollapsedAIJ, and each class can provide a fast 7538 implementation */ 7539 if (bs > 1) { 7540 PetscCall(MatGetType(Amat, &jtype)); 7541 PetscCall(MatCreate(comm, &Gmat)); 7542 PetscCall(MatSetType(Gmat, jtype)); 7543 PetscCall(MatSetSizes(Gmat, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE)); 7544 PetscCall(MatSetBlockSizes(Gmat, 1, 1)); 7545 if (isseqaij || ((Mat_MPIAIJ *)Amat->data)->garray) { 7546 PetscInt *d_nnz, *o_nnz; 7547 MatScalar *aa, val, AA[4096]; 7548 PetscInt *aj, *ai, AJ[4096], nc; 7549 if (isseqaij) { 7550 a = Amat; 7551 b = NULL; 7552 } else { 7553 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Amat->data; 7554 a = d->A; 7555 b = d->B; 7556 } 7557 PetscCall(PetscInfo(Amat, "New bs>1 Graph. nloc=%" PetscInt_FMT "\n", nloc)); 7558 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7559 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7560 PetscInt *nnz = (c == a) ? d_nnz : o_nnz, nmax = 0; 7561 const PetscInt *cols; 7562 for (PetscInt brow = 0, jj, ok = 1, j0; brow < nloc * bs; brow += bs) { // block rows 7563 PetscCall(MatGetRow(c, brow, &jj, &cols, NULL)); 7564 nnz[brow / bs] = jj / bs; 7565 if (jj % bs) ok = 0; 7566 if (cols) j0 = cols[0]; 7567 else j0 = -1; 7568 PetscCall(MatRestoreRow(c, brow, &jj, &cols, NULL)); 7569 if (nnz[brow / bs] > nmax) nmax = nnz[brow / bs]; 7570 for (PetscInt ii = 1; ii < bs && nnz[brow / bs]; ii++) { // check for non-dense blocks 7571 PetscCall(MatGetRow(c, brow + ii, &jj, &cols, NULL)); 7572 if (jj % bs) ok = 0; 7573 if ((cols && j0 != cols[0]) || (!cols && j0 != -1)) ok = 0; 7574 if (nnz[brow / bs] != jj / bs) ok = 0; 7575 PetscCall(MatRestoreRow(c, brow + ii, &jj, &cols, NULL)); 7576 } 7577 if (!ok) { 7578 PetscCall(PetscFree2(d_nnz, o_nnz)); 7579 goto old_bs; 7580 } 7581 } 7582 PetscCheck(nmax < 4096, PETSC_COMM_SELF, PETSC_ERR_USER, "Buffer %" PetscInt_FMT " too small 4096.", nmax); 7583 } 7584 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7585 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7586 PetscCall(PetscFree2(d_nnz, o_nnz)); 7587 // diag 7588 for (PetscInt brow = 0, n, grow; brow < nloc * bs; brow += bs) { // block rows 7589 Mat_SeqAIJ *aseq = (Mat_SeqAIJ *)a->data; 7590 ai = aseq->i; 7591 n = ai[brow + 1] - ai[brow]; 7592 aj = aseq->j + ai[brow]; 7593 for (int k = 0; k < n; k += bs) { // block columns 7594 AJ[k / bs] = aj[k] / bs + Istart / bs; // diag starts at (Istart,Istart) 7595 val = 0; 7596 for (int ii = 0; ii < bs; ii++) { // rows in block 7597 aa = aseq->a + ai[brow + ii] + k; 7598 for (int jj = 0; jj < bs; jj++) { // columns in block 7599 val += PetscAbs(PetscRealPart(aa[jj])); // a sort of norm 7600 } 7601 } 7602 AA[k / bs] = val; 7603 } 7604 grow = Istart / bs + brow / bs; 7605 PetscCall(MatSetValues(Gmat, 1, &grow, n / bs, AJ, AA, INSERT_VALUES)); 7606 } 7607 // off-diag 7608 if (ismpiaij) { 7609 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)Amat->data; 7610 const PetscScalar *vals; 7611 const PetscInt *cols, *garray = aij->garray; 7612 PetscCheck(garray, PETSC_COMM_SELF, PETSC_ERR_USER, "No garray ?"); 7613 for (PetscInt brow = 0, grow; brow < nloc * bs; brow += bs) { // block rows 7614 PetscCall(MatGetRow(b, brow, &ncols, &cols, NULL)); 7615 for (int k = 0, cidx = 0; k < ncols; k += bs, cidx++) { 7616 AA[k / bs] = 0; 7617 AJ[cidx] = garray[cols[k]] / bs; 7618 } 7619 nc = ncols / bs; 7620 PetscCall(MatRestoreRow(b, brow, &ncols, &cols, NULL)); 7621 for (int ii = 0; ii < bs; ii++) { // rows in block 7622 PetscCall(MatGetRow(b, brow + ii, &ncols, &cols, &vals)); 7623 for (int k = 0; k < ncols; k += bs) { 7624 for (int jj = 0; jj < bs; jj++) { // cols in block 7625 AA[k / bs] += PetscAbs(PetscRealPart(vals[k + jj])); 7626 } 7627 } 7628 PetscCall(MatRestoreRow(b, brow + ii, &ncols, &cols, &vals)); 7629 } 7630 grow = Istart / bs + brow / bs; 7631 PetscCall(MatSetValues(Gmat, 1, &grow, nc, AJ, AA, INSERT_VALUES)); 7632 } 7633 } 7634 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7635 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7636 } else { 7637 const PetscScalar *vals; 7638 const PetscInt *idx; 7639 PetscInt *d_nnz, *o_nnz, *w0, *w1, *w2; 7640 old_bs: 7641 /* 7642 Determine the preallocation needed for the scalar matrix derived from the vector matrix. 7643 */ 7644 PetscCall(PetscInfo(Amat, "OLD bs>1 CreateGraph\n")); 7645 PetscCall(PetscMalloc2(nloc, &d_nnz, isseqaij ? 0 : nloc, &o_nnz)); 7646 if (isseqaij) { 7647 PetscInt max_d_nnz; 7648 /* 7649 Determine exact preallocation count for (sequential) scalar matrix 7650 */ 7651 PetscCall(MatSeqAIJGetMaxRowNonzeros(Amat, &max_d_nnz)); 7652 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7653 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7654 for (Ii = 0, jj = 0; Ii < Iend; Ii += bs, jj++) { PetscCall(MatCollapseRows(Amat, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); } 7655 PetscCall(PetscFree3(w0, w1, w2)); 7656 } else if (ismpiaij) { 7657 Mat Daij, Oaij; 7658 const PetscInt *garray; 7659 PetscInt max_d_nnz; 7660 PetscCall(MatMPIAIJGetSeqAIJ(Amat, &Daij, &Oaij, &garray)); 7661 /* 7662 Determine exact preallocation count for diagonal block portion of scalar matrix 7663 */ 7664 PetscCall(MatSeqAIJGetMaxRowNonzeros(Daij, &max_d_nnz)); 7665 max_d_nnz = PetscMin(nloc, bs * max_d_nnz); 7666 PetscCall(PetscMalloc3(max_d_nnz, &w0, max_d_nnz, &w1, max_d_nnz, &w2)); 7667 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { PetscCall(MatCollapseRows(Daij, Ii, bs, w0, w1, w2, &d_nnz[jj], NULL)); } 7668 PetscCall(PetscFree3(w0, w1, w2)); 7669 /* 7670 Over estimate (usually grossly over), preallocation count for off-diagonal portion of scalar matrix 7671 */ 7672 for (Ii = 0, jj = 0; Ii < Iend - Istart; Ii += bs, jj++) { 7673 o_nnz[jj] = 0; 7674 for (kk = 0; kk < bs; kk++) { /* rows that get collapsed to a single row */ 7675 PetscCall(MatGetRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7676 o_nnz[jj] += ncols; 7677 PetscCall(MatRestoreRow(Oaij, Ii + kk, &ncols, NULL, NULL)); 7678 } 7679 if (o_nnz[jj] > (NN / bs - nloc)) o_nnz[jj] = NN / bs - nloc; 7680 } 7681 } else SETERRQ(comm, PETSC_ERR_USER, "Require AIJ matrix type"); 7682 /* get scalar copy (norms) of matrix */ 7683 PetscCall(MatSeqAIJSetPreallocation(Gmat, 0, d_nnz)); 7684 PetscCall(MatMPIAIJSetPreallocation(Gmat, 0, d_nnz, 0, o_nnz)); 7685 PetscCall(PetscFree2(d_nnz, o_nnz)); 7686 for (Ii = Istart; Ii < Iend; Ii++) { 7687 PetscInt dest_row = Ii / bs; 7688 PetscCall(MatGetRow(Amat, Ii, &ncols, &idx, &vals)); 7689 for (jj = 0; jj < ncols; jj++) { 7690 PetscInt dest_col = idx[jj] / bs; 7691 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7692 PetscCall(MatSetValues(Gmat, 1, &dest_row, 1, &dest_col, &sv, ADD_VALUES)); 7693 } 7694 PetscCall(MatRestoreRow(Amat, Ii, &ncols, &idx, &vals)); 7695 } 7696 PetscCall(MatAssemblyBegin(Gmat, MAT_FINAL_ASSEMBLY)); 7697 PetscCall(MatAssemblyEnd(Gmat, MAT_FINAL_ASSEMBLY)); 7698 } 7699 } else { 7700 /* TODO GPU: optimization proposal, each class provides fast implementation of this 7701 procedure via MatAbs API */ 7702 /* just copy scalar matrix & abs() */ 7703 PetscCall(MatDuplicate(Amat, MAT_COPY_VALUES, &Gmat)); 7704 if (isseqaij) { 7705 a = Gmat; 7706 b = NULL; 7707 } else { 7708 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7709 a = d->A; 7710 b = d->B; 7711 } 7712 /* abs */ 7713 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7714 MatInfo info; 7715 PetscScalar *avals; 7716 PetscCall(MatGetInfo(c, MAT_LOCAL, &info)); 7717 PetscCall(MatSeqAIJGetArray(c, &avals)); 7718 for (int jj = 0; jj < info.nz_used; jj++) avals[jj] = PetscAbsScalar(avals[jj]); 7719 PetscCall(MatSeqAIJRestoreArray(c, &avals)); 7720 } 7721 } 7722 if (symmetrize) { 7723 PetscBool isset, issym; 7724 PetscCall(MatIsSymmetricKnown(Amat, &isset, &issym)); 7725 if (!isset || !issym) { 7726 Mat matTrans; 7727 PetscCall(MatTranspose(Gmat, MAT_INITIAL_MATRIX, &matTrans)); 7728 PetscCall(MatAXPY(Gmat, 1.0, matTrans, Gmat->structurally_symmetric ? SAME_NONZERO_PATTERN : DIFFERENT_NONZERO_PATTERN)); 7729 PetscCall(MatDestroy(&matTrans)); 7730 } 7731 PetscCall(MatSetOption(Gmat, MAT_SYMMETRIC, PETSC_TRUE)); 7732 } else { 7733 PetscCall(MatPropagateSymmetryOptions(Amat, Gmat)); 7734 } 7735 if (scale) { 7736 /* scale c for all diagonal values = 1 or -1 */ 7737 Vec diag; 7738 PetscCall(MatCreateVecs(Gmat, &diag, NULL)); 7739 PetscCall(MatGetDiagonal(Gmat, diag)); 7740 PetscCall(VecReciprocal(diag)); 7741 PetscCall(VecSqrtAbs(diag)); 7742 PetscCall(MatDiagonalScale(Gmat, diag, diag)); 7743 PetscCall(VecDestroy(&diag)); 7744 } 7745 PetscCall(MatViewFromOptions(Gmat, NULL, "-mat_graph_view")); 7746 *a_Gmat = Gmat; 7747 PetscFunctionReturn(0); 7748 } 7749 7750 PETSC_INTERN PetscErrorCode MatFilter_AIJ(Mat Gmat, PetscReal vfilter, Mat *filteredG) { 7751 PetscInt Istart, Iend, ncols, nnz0, nnz1, NN, MM, nloc; 7752 Mat tGmat; 7753 MPI_Comm comm; 7754 const PetscScalar *vals; 7755 const PetscInt *idx; 7756 PetscInt *d_nnz, *o_nnz, kk, *garray = NULL, *AJ, maxcols = 0; 7757 MatScalar *AA; // this is checked in graph 7758 PetscBool isseqaij; 7759 Mat a, b, c; 7760 MatType jtype; 7761 7762 PetscFunctionBegin; 7763 PetscCall(PetscObjectGetComm((PetscObject)Gmat, &comm)); 7764 PetscCall(PetscObjectBaseTypeCompare((PetscObject)Gmat, MATSEQAIJ, &isseqaij)); 7765 PetscCall(MatGetType(Gmat, &jtype)); 7766 PetscCall(MatCreate(comm, &tGmat)); 7767 PetscCall(MatSetType(tGmat, jtype)); 7768 7769 /* TODO GPU: this can be called when filter = 0 -> Probably provide MatAIJThresholdCompress that compresses the entries below a threshold? 7770 Also, if the matrix is symmetric, can we skip this 7771 operation? It can be very expensive on large matrices. */ 7772 7773 // global sizes 7774 PetscCall(MatGetSize(Gmat, &MM, &NN)); 7775 PetscCall(MatGetOwnershipRange(Gmat, &Istart, &Iend)); 7776 nloc = Iend - Istart; 7777 PetscCall(PetscMalloc2(nloc, &d_nnz, nloc, &o_nnz)); 7778 if (isseqaij) { 7779 a = Gmat; 7780 b = NULL; 7781 } else { 7782 Mat_MPIAIJ *d = (Mat_MPIAIJ *)Gmat->data; 7783 a = d->A; 7784 b = d->B; 7785 garray = d->garray; 7786 } 7787 /* Determine upper bound on non-zeros needed in new filtered matrix */ 7788 for (PetscInt row = 0; row < nloc; row++) { 7789 PetscCall(MatGetRow(a, row, &ncols, NULL, NULL)); 7790 d_nnz[row] = ncols; 7791 if (ncols > maxcols) maxcols = ncols; 7792 PetscCall(MatRestoreRow(a, row, &ncols, NULL, NULL)); 7793 } 7794 if (b) { 7795 for (PetscInt row = 0; row < nloc; row++) { 7796 PetscCall(MatGetRow(b, row, &ncols, NULL, NULL)); 7797 o_nnz[row] = ncols; 7798 if (ncols > maxcols) maxcols = ncols; 7799 PetscCall(MatRestoreRow(b, row, &ncols, NULL, NULL)); 7800 } 7801 } 7802 PetscCall(MatSetSizes(tGmat, nloc, nloc, MM, MM)); 7803 PetscCall(MatSetBlockSizes(tGmat, 1, 1)); 7804 PetscCall(MatSeqAIJSetPreallocation(tGmat, 0, d_nnz)); 7805 PetscCall(MatMPIAIJSetPreallocation(tGmat, 0, d_nnz, 0, o_nnz)); 7806 PetscCall(MatSetOption(tGmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 7807 PetscCall(PetscFree2(d_nnz, o_nnz)); 7808 // 7809 PetscCall(PetscMalloc2(maxcols, &AA, maxcols, &AJ)); 7810 nnz0 = nnz1 = 0; 7811 for (c = a, kk = 0; c && kk < 2; c = b, kk++) { 7812 for (PetscInt row = 0, grow = Istart, ncol_row, jj; row < nloc; row++, grow++) { 7813 PetscCall(MatGetRow(c, row, &ncols, &idx, &vals)); 7814 for (ncol_row = jj = 0; jj < ncols; jj++, nnz0++) { 7815 PetscScalar sv = PetscAbs(PetscRealPart(vals[jj])); 7816 if (PetscRealPart(sv) > vfilter) { 7817 nnz1++; 7818 PetscInt cid = idx[jj] + Istart; //diag 7819 if (c != a) cid = garray[idx[jj]]; 7820 AA[ncol_row] = vals[jj]; 7821 AJ[ncol_row] = cid; 7822 ncol_row++; 7823 } 7824 } 7825 PetscCall(MatRestoreRow(c, row, &ncols, &idx, &vals)); 7826 PetscCall(MatSetValues(tGmat, 1, &grow, ncol_row, AJ, AA, INSERT_VALUES)); 7827 } 7828 } 7829 PetscCall(PetscFree2(AA, AJ)); 7830 PetscCall(MatAssemblyBegin(tGmat, MAT_FINAL_ASSEMBLY)); 7831 PetscCall(MatAssemblyEnd(tGmat, MAT_FINAL_ASSEMBLY)); 7832 PetscCall(MatPropagateSymmetryOptions(Gmat, tGmat)); /* Normal Mat options are not relevant ? */ 7833 7834 PetscCall(PetscInfo(tGmat, "\t %g%% nnz after filtering, with threshold %g, %g nnz ave. (N=%" PetscInt_FMT ", max row size %d)\n", (!nnz0) ? 1. : 100. * (double)nnz1 / (double)nnz0, (double)vfilter, (!nloc) ? 1. : (double)nnz0 / (double)nloc, MM, (int)maxcols)); 7835 7836 *filteredG = tGmat; 7837 PetscCall(MatViewFromOptions(tGmat, NULL, "-mat_filter_graph_view")); 7838 PetscFunctionReturn(0); 7839 } 7840 7841 /* 7842 Special version for direct calls from Fortran 7843 */ 7844 #include <petsc/private/fortranimpl.h> 7845 7846 /* Change these macros so can be used in void function */ 7847 /* Identical to PetscCallVoid, except it assigns to *_ierr */ 7848 #undef PetscCall 7849 #define PetscCall(...) \ 7850 do { \ 7851 PetscErrorCode ierr_msv_mpiaij = __VA_ARGS__; \ 7852 if (PetscUnlikely(ierr_msv_mpiaij)) { \ 7853 *_ierr = PetscError(PETSC_COMM_SELF, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr_msv_mpiaij, PETSC_ERROR_REPEAT, " "); \ 7854 return; \ 7855 } \ 7856 } while (0) 7857 7858 #undef SETERRQ 7859 #define SETERRQ(comm, ierr, ...) \ 7860 do { \ 7861 *_ierr = PetscError(comm, __LINE__, PETSC_FUNCTION_NAME, __FILE__, ierr, PETSC_ERROR_INITIAL, __VA_ARGS__); \ 7862 return; \ 7863 } while (0) 7864 7865 #if defined(PETSC_HAVE_FORTRAN_CAPS) 7866 #define matsetvaluesmpiaij_ MATSETVALUESMPIAIJ 7867 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 7868 #define matsetvaluesmpiaij_ matsetvaluesmpiaij 7869 #else 7870 #endif 7871 PETSC_EXTERN void matsetvaluesmpiaij_(Mat *mmat, PetscInt *mm, const PetscInt im[], PetscInt *mn, const PetscInt in[], const PetscScalar v[], InsertMode *maddv, PetscErrorCode *_ierr) { 7872 Mat mat = *mmat; 7873 PetscInt m = *mm, n = *mn; 7874 InsertMode addv = *maddv; 7875 Mat_MPIAIJ *aij = (Mat_MPIAIJ *)mat->data; 7876 PetscScalar value; 7877 7878 MatCheckPreallocated(mat, 1); 7879 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 7880 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 7881 { 7882 PetscInt i, j, rstart = mat->rmap->rstart, rend = mat->rmap->rend; 7883 PetscInt cstart = mat->cmap->rstart, cend = mat->cmap->rend, row, col; 7884 PetscBool roworiented = aij->roworiented; 7885 7886 /* Some Variables required in the macro */ 7887 Mat A = aij->A; 7888 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 7889 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 7890 MatScalar *aa; 7891 PetscBool ignorezeroentries = (((a->ignorezeroentries) && (addv == ADD_VALUES)) ? PETSC_TRUE : PETSC_FALSE); 7892 Mat B = aij->B; 7893 Mat_SeqAIJ *b = (Mat_SeqAIJ *)B->data; 7894 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j, bm = aij->B->rmap->n, am = aij->A->rmap->n; 7895 MatScalar *ba; 7896 /* This variable below is only for the PETSC_HAVE_VIENNACL or PETSC_HAVE_CUDA cases, but we define it in all cases because we 7897 * cannot use "#if defined" inside a macro. */ 7898 PETSC_UNUSED PetscBool inserted = PETSC_FALSE; 7899 7900 PetscInt *rp1, *rp2, ii, nrow1, nrow2, _i, rmax1, rmax2, N, low1, high1, low2, high2, t, lastcol1, lastcol2; 7901 PetscInt nonew = a->nonew; 7902 MatScalar *ap1, *ap2; 7903 7904 PetscFunctionBegin; 7905 PetscCall(MatSeqAIJGetArray(A, &aa)); 7906 PetscCall(MatSeqAIJGetArray(B, &ba)); 7907 for (i = 0; i < m; i++) { 7908 if (im[i] < 0) continue; 7909 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 7910 if (im[i] >= rstart && im[i] < rend) { 7911 row = im[i] - rstart; 7912 lastcol1 = -1; 7913 rp1 = aj + ai[row]; 7914 ap1 = aa + ai[row]; 7915 rmax1 = aimax[row]; 7916 nrow1 = ailen[row]; 7917 low1 = 0; 7918 high1 = nrow1; 7919 lastcol2 = -1; 7920 rp2 = bj + bi[row]; 7921 ap2 = ba + bi[row]; 7922 rmax2 = bimax[row]; 7923 nrow2 = bilen[row]; 7924 low2 = 0; 7925 high2 = nrow2; 7926 7927 for (j = 0; j < n; j++) { 7928 if (roworiented) value = v[i * n + j]; 7929 else value = v[i + j * m]; 7930 if (ignorezeroentries && value == 0.0 && (addv == ADD_VALUES) && im[i] != in[j]) continue; 7931 if (in[j] >= cstart && in[j] < cend) { 7932 col = in[j] - cstart; 7933 MatSetValues_SeqAIJ_A_Private(row, col, value, addv, im[i], in[j]); 7934 } else if (in[j] < 0) continue; 7935 else if (PetscUnlikelyDebug(in[j] >= mat->cmap->N)) { 7936 /* extra brace on SETERRQ() is required for --with-errorchecking=0 - due to the next 'else' clause */ 7937 SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 7938 } else { 7939 if (mat->was_assembled) { 7940 if (!aij->colmap) { PetscCall(MatCreateColmap_MPIAIJ_Private(mat)); } 7941 #if defined(PETSC_USE_CTABLE) 7942 PetscCall(PetscTableFind(aij->colmap, in[j] + 1, &col)); 7943 col--; 7944 #else 7945 col = aij->colmap[in[j]] - 1; 7946 #endif 7947 if (col < 0 && !((Mat_SeqAIJ *)(aij->A->data))->nonew) { 7948 PetscCall(MatDisAssemble_MPIAIJ(mat)); 7949 col = in[j]; 7950 /* Reinitialize the variables required by MatSetValues_SeqAIJ_B_Private() */ 7951 B = aij->B; 7952 b = (Mat_SeqAIJ *)B->data; 7953 bimax = b->imax; 7954 bi = b->i; 7955 bilen = b->ilen; 7956 bj = b->j; 7957 rp2 = bj + bi[row]; 7958 ap2 = ba + bi[row]; 7959 rmax2 = bimax[row]; 7960 nrow2 = bilen[row]; 7961 low2 = 0; 7962 high2 = nrow2; 7963 bm = aij->B->rmap->n; 7964 ba = b->a; 7965 inserted = PETSC_FALSE; 7966 } 7967 } else col = in[j]; 7968 MatSetValues_SeqAIJ_B_Private(row, col, value, addv, im[i], in[j]); 7969 } 7970 } 7971 } else if (!aij->donotstash) { 7972 if (roworiented) { 7973 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7974 } else { 7975 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, (PetscBool)(ignorezeroentries && (addv == ADD_VALUES)))); 7976 } 7977 } 7978 } 7979 PetscCall(MatSeqAIJRestoreArray(A, &aa)); 7980 PetscCall(MatSeqAIJRestoreArray(B, &ba)); 7981 } 7982 PetscFunctionReturnVoid(); 7983 } 7984 7985 /* Undefining these here since they were redefined from their original definition above! No 7986 * other PETSc functions should be defined past this point, as it is impossible to recover the 7987 * original definitions */ 7988 #undef PetscCall 7989 #undef SETERRQ 7990