1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 721 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp[j] > *nrm) *nrm = tmp[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 } else if (type == NORM_INFINITY) { /* max row sum */ 753 PetscReal *sums; 754 PetscCall(PetscMalloc1(bs, &sums)); 755 sum = 0.0; 756 for (j = 0; j < amat->mbs; j++) { 757 for (row = 0; row < bs; row++) sums[row] = 0.0; 758 v = amat->a + bs2 * amat->i[j]; 759 nz = amat->i[j + 1] - amat->i[j]; 760 for (i = 0; i < nz; i++) { 761 for (col = 0; col < bs; col++) { 762 for (row = 0; row < bs; row++) { 763 sums[row] += PetscAbsScalar(*v); 764 v++; 765 } 766 } 767 } 768 v = bmat->a + bs2 * bmat->i[j]; 769 nz = bmat->i[j + 1] - bmat->i[j]; 770 for (i = 0; i < nz; i++) { 771 for (col = 0; col < bs; col++) { 772 for (row = 0; row < bs; row++) { 773 sums[row] += PetscAbsScalar(*v); 774 v++; 775 } 776 } 777 } 778 for (row = 0; row < bs; row++) { 779 if (sums[row] > sum) sum = sums[row]; 780 } 781 } 782 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 783 PetscCall(PetscFree(sums)); 784 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 785 } 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 /* 790 Creates the hash table, and sets the table 791 This table is created only once. 792 If new entried need to be added to the matrix 793 then the hash table has to be destroyed and 794 recreated. 795 */ 796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 797 { 798 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 799 Mat A = baij->A, B = baij->B; 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 801 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 802 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 803 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 804 PetscInt *HT, key; 805 MatScalar **HD; 806 PetscReal tmp; 807 #if defined(PETSC_USE_INFO) 808 PetscInt ct = 0, max = 0; 809 #endif 810 811 PetscFunctionBegin; 812 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 813 814 baij->ht_size = (PetscInt)(factor * nz); 815 ht_size = baij->ht_size; 816 817 /* Allocate Memory for Hash Table */ 818 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 819 HD = baij->hd; 820 HT = baij->ht; 821 822 /* Loop Over A */ 823 for (i = 0; i < a->mbs; i++) { 824 for (j = ai[i]; j < ai[i + 1]; j++) { 825 row = i + rstart; 826 col = aj[j] + cstart; 827 828 key = row * Nbs + col + 1; 829 h1 = HASH(ht_size, key, tmp); 830 for (k = 0; k < ht_size; k++) { 831 if (!HT[(h1 + k) % ht_size]) { 832 HT[(h1 + k) % ht_size] = key; 833 HD[(h1 + k) % ht_size] = a->a + j * bs2; 834 break; 835 #if defined(PETSC_USE_INFO) 836 } else { 837 ct++; 838 #endif 839 } 840 } 841 #if defined(PETSC_USE_INFO) 842 if (k > max) max = k; 843 #endif 844 } 845 } 846 /* Loop Over B */ 847 for (i = 0; i < b->mbs; i++) { 848 for (j = bi[i]; j < bi[i + 1]; j++) { 849 row = i + rstart; 850 col = garray[bj[j]]; 851 key = row * Nbs + col + 1; 852 h1 = HASH(ht_size, key, tmp); 853 for (k = 0; k < ht_size; k++) { 854 if (!HT[(h1 + k) % ht_size]) { 855 HT[(h1 + k) % ht_size] = key; 856 HD[(h1 + k) % ht_size] = b->a + j * bs2; 857 break; 858 #if defined(PETSC_USE_INFO) 859 } else { 860 ct++; 861 #endif 862 } 863 } 864 #if defined(PETSC_USE_INFO) 865 if (k > max) max = k; 866 #endif 867 } 868 } 869 870 /* Print Summary */ 871 #if defined(PETSC_USE_INFO) 872 for (i = 0, j = 0; i < ht_size; i++) { 873 if (HT[i]) j++; 874 } 875 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 876 #endif 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 881 { 882 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 883 PetscInt nstash, reallocs; 884 885 PetscFunctionBegin; 886 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 887 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 890 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 891 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 892 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 893 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 901 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 902 PetscInt *row, *col; 903 PetscBool r1, r2, r3, other_disassembled; 904 MatScalar *val; 905 PetscMPIInt n; 906 907 PetscFunctionBegin; 908 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 909 if (!baij->donotstash && !mat->nooffprocentries) { 910 while (1) { 911 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 912 if (!flg) break; 913 914 for (i = 0; i < n;) { 915 /* Now identify the consecutive vals belonging to the same row */ 916 for (j = i, rstart = row[j]; j < n; j++) { 917 if (row[j] != rstart) break; 918 } 919 if (j < n) ncols = j - i; 920 else ncols = n - i; 921 /* Now assemble all these values with a single function call */ 922 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 923 i = j; 924 } 925 } 926 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 927 /* Now process the block-stash. Since the values are stashed column-oriented, 928 set the row-oriented flag to column-oriented, and after MatSetValues() 929 restore the original flags */ 930 r1 = baij->roworiented; 931 r2 = a->roworiented; 932 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 933 934 baij->roworiented = PETSC_FALSE; 935 a->roworiented = PETSC_FALSE; 936 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 937 while (1) { 938 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 939 if (!flg) break; 940 941 for (i = 0; i < n;) { 942 /* Now identify the consecutive vals belonging to the same row */ 943 for (j = i, rstart = row[j]; j < n; j++) { 944 if (row[j] != rstart) break; 945 } 946 if (j < n) ncols = j - i; 947 else ncols = n - i; 948 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 949 i = j; 950 } 951 } 952 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 953 954 baij->roworiented = r1; 955 a->roworiented = r2; 956 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 957 } 958 959 PetscCall(MatAssemblyBegin(baij->A, mode)); 960 PetscCall(MatAssemblyEnd(baij->A, mode)); 961 962 /* determine if any processor has disassembled, if so we must 963 also disassemble ourselves, in order that we may reassemble. */ 964 /* 965 if nonzero structure of submatrix B cannot change then we know that 966 no processor disassembled thus we can skip this stuff 967 */ 968 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 969 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 970 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 971 } 972 973 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 974 PetscCall(MatAssemblyBegin(baij->B, mode)); 975 PetscCall(MatAssemblyEnd(baij->B, mode)); 976 977 #if defined(PETSC_USE_INFO) 978 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 980 981 baij->ht_total_ct = 0; 982 baij->ht_insert_ct = 0; 983 } 984 #endif 985 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 987 988 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990 } 991 992 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 993 994 baij->rowvalues = NULL; 995 996 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 997 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 998 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 999 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1005 #include <petscdraw.h> 1006 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1007 { 1008 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1009 PetscMPIInt rank = baij->rank; 1010 PetscInt bs = mat->rmap->bs; 1011 PetscBool iascii, isdraw; 1012 PetscViewer sviewer; 1013 PetscViewerFormat format; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1018 if (iascii) { 1019 PetscCall(PetscViewerGetFormat(viewer, &format)); 1020 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1021 MatInfo info; 1022 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1023 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1024 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1025 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1026 mat->rmap->bs, info.memory)); 1027 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1029 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1031 PetscCall(PetscViewerFlush(viewer)); 1032 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1033 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1034 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1037 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 } 1043 1044 if (isdraw) { 1045 PetscDraw draw; 1046 PetscBool isnull; 1047 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1048 PetscCall(PetscDrawIsNull(draw, &isnull)); 1049 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 { 1053 /* assemble the entire matrix onto first processor. */ 1054 Mat A; 1055 Mat_SeqBAIJ *Aloc; 1056 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1057 MatScalar *a; 1058 const char *matname; 1059 1060 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1061 /* Perhaps this should be the type of mat? */ 1062 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1063 if (rank == 0) { 1064 PetscCall(MatSetSizes(A, M, N, M, N)); 1065 } else { 1066 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1067 } 1068 PetscCall(MatSetType(A, MATMPIBAIJ)); 1069 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1070 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1071 1072 /* copy over the A part */ 1073 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1074 ai = Aloc->i; 1075 aj = Aloc->j; 1076 a = Aloc->a; 1077 PetscCall(PetscMalloc1(bs, &rvals)); 1078 1079 for (i = 0; i < mbs; i++) { 1080 rvals[0] = bs * (baij->rstartbs + i); 1081 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1082 for (j = ai[i]; j < ai[i + 1]; j++) { 1083 col = (baij->cstartbs + aj[j]) * bs; 1084 for (k = 0; k < bs; k++) { 1085 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1086 col++; 1087 a += bs; 1088 } 1089 } 1090 } 1091 /* copy over the B part */ 1092 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1093 ai = Aloc->i; 1094 aj = Aloc->j; 1095 a = Aloc->a; 1096 for (i = 0; i < mbs; i++) { 1097 rvals[0] = bs * (baij->rstartbs + i); 1098 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1099 for (j = ai[i]; j < ai[i + 1]; j++) { 1100 col = baij->garray[aj[j]] * bs; 1101 for (k = 0; k < bs; k++) { 1102 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1103 col++; 1104 a += bs; 1105 } 1106 } 1107 } 1108 PetscCall(PetscFree(rvals)); 1109 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1110 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1111 /* 1112 Everyone has to call to draw the matrix since the graphics waits are 1113 synchronized across all processors that share the PetscDraw object 1114 */ 1115 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1116 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1117 if (rank == 0) { 1118 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1119 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1120 } 1121 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1122 PetscCall(MatDestroy(&A)); 1123 } 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1128 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1129 { 1130 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1131 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1132 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1133 const PetscInt *garray = aij->garray; 1134 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1135 PetscCount nz, hnz; 1136 PetscInt *rowlens, *colidxs; 1137 PetscScalar *matvals; 1138 PetscMPIInt rank; 1139 1140 PetscFunctionBegin; 1141 PetscCall(PetscViewerSetUp(viewer)); 1142 1143 M = mat->rmap->N; 1144 N = mat->cmap->N; 1145 m = mat->rmap->n; 1146 rs = mat->rmap->rstart; 1147 cs = mat->cmap->rstart; 1148 bs = mat->rmap->bs; 1149 nz = bs * bs * (A->nz + B->nz); 1150 1151 /* write matrix header */ 1152 header[0] = MAT_FILE_CLASSID; 1153 header[1] = M; 1154 header[2] = N; 1155 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1156 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1157 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1158 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1159 1160 /* fill in and store row lengths */ 1161 PetscCall(PetscMalloc1(m, &rowlens)); 1162 for (cnt = 0, i = 0; i < A->mbs; i++) 1163 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1164 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1165 PetscCall(PetscFree(rowlens)); 1166 1167 /* fill in and store column indices */ 1168 PetscCall(PetscMalloc1(nz, &colidxs)); 1169 for (cnt = 0, i = 0; i < A->mbs; i++) { 1170 for (k = 0; k < bs; k++) { 1171 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1172 if (garray[B->j[jb]] > cs / bs) break; 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1174 } 1175 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1177 for (; jb < B->i[i + 1]; jb++) 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 } 1181 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1182 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1183 PetscCall(PetscFree(colidxs)); 1184 1185 /* fill in and store nonzero values */ 1186 PetscCall(PetscMalloc1(nz, &matvals)); 1187 for (cnt = 0, i = 0; i < A->mbs; i++) { 1188 for (k = 0; k < bs; k++) { 1189 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1190 if (garray[B->j[jb]] > cs / bs) break; 1191 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1192 } 1193 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1194 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1195 for (; jb < B->i[i + 1]; jb++) 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 } 1199 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1200 PetscCall(PetscFree(matvals)); 1201 1202 /* write block size option to the viewer's .info file */ 1203 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1208 { 1209 PetscBool iascii, isdraw, issocket, isbinary; 1210 1211 PetscFunctionBegin; 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1216 if (iascii || isdraw || issocket) { 1217 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1218 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 } 1238 1239 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(PETSC_SUCCESS); 1458 } 1459 1460 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_FORCE_DIAGONAL_ENTRIES: 1483 case MAT_SORTED_FULL: 1484 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1485 break; 1486 case MAT_IGNORE_OFF_PROC_ENTRIES: 1487 a->donotstash = flg; 1488 break; 1489 case MAT_USE_HASH_TABLE: 1490 a->ht_flag = flg; 1491 a->ht_fact = 1.39; 1492 break; 1493 case MAT_SYMMETRIC: 1494 case MAT_STRUCTURALLY_SYMMETRIC: 1495 case MAT_HERMITIAN: 1496 case MAT_SUBMAT_SINGLEIS: 1497 case MAT_SYMMETRY_ETERNAL: 1498 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1499 case MAT_SPD_ETERNAL: 1500 /* if the diagonal matrix is square it inherits some of the properties above */ 1501 break; 1502 default: 1503 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1504 } 1505 PetscFunctionReturn(PETSC_SUCCESS); 1506 } 1507 1508 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1509 { 1510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1511 Mat_SeqBAIJ *Aloc; 1512 Mat B; 1513 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1514 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1515 MatScalar *a; 1516 1517 PetscFunctionBegin; 1518 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1519 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1520 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1521 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1522 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1523 /* Do not know preallocation information, but must set block size */ 1524 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1525 } else { 1526 B = *matout; 1527 } 1528 1529 /* copy over the A part */ 1530 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1531 ai = Aloc->i; 1532 aj = Aloc->j; 1533 a = Aloc->a; 1534 PetscCall(PetscMalloc1(bs, &rvals)); 1535 1536 for (i = 0; i < mbs; i++) { 1537 rvals[0] = bs * (baij->rstartbs + i); 1538 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1539 for (j = ai[i]; j < ai[i + 1]; j++) { 1540 col = (baij->cstartbs + aj[j]) * bs; 1541 for (k = 0; k < bs; k++) { 1542 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1543 1544 col++; 1545 a += bs; 1546 } 1547 } 1548 } 1549 /* copy over the B part */ 1550 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1551 ai = Aloc->i; 1552 aj = Aloc->j; 1553 a = Aloc->a; 1554 for (i = 0; i < mbs; i++) { 1555 rvals[0] = bs * (baij->rstartbs + i); 1556 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1557 for (j = ai[i]; j < ai[i + 1]; j++) { 1558 col = baij->garray[aj[j]] * bs; 1559 for (k = 0; k < bs; k++) { 1560 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1561 col++; 1562 a += bs; 1563 } 1564 } 1565 } 1566 PetscCall(PetscFree(rvals)); 1567 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1568 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1569 1570 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1571 else PetscCall(MatHeaderMerge(A, &B)); 1572 PetscFunctionReturn(PETSC_SUCCESS); 1573 } 1574 1575 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1576 { 1577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1578 Mat a = baij->A, b = baij->B; 1579 PetscInt s1, s2, s3; 1580 1581 PetscFunctionBegin; 1582 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1583 if (rr) { 1584 PetscCall(VecGetLocalSize(rr, &s1)); 1585 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1586 /* Overlap communication with computation. */ 1587 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1588 } 1589 if (ll) { 1590 PetscCall(VecGetLocalSize(ll, &s1)); 1591 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1592 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1593 } 1594 /* scale the diagonal block */ 1595 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1596 1597 if (rr) { 1598 /* Do a scatter end and then right scale the off-diagonal block */ 1599 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1601 } 1602 PetscFunctionReturn(PETSC_SUCCESS); 1603 } 1604 1605 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1606 { 1607 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1608 PetscInt *lrows; 1609 PetscInt r, len; 1610 PetscBool cong; 1611 1612 PetscFunctionBegin; 1613 /* get locally owned rows */ 1614 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1615 /* fix right-hand side if needed */ 1616 if (x && b) { 1617 const PetscScalar *xx; 1618 PetscScalar *bb; 1619 1620 PetscCall(VecGetArrayRead(x, &xx)); 1621 PetscCall(VecGetArray(b, &bb)); 1622 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1623 PetscCall(VecRestoreArrayRead(x, &xx)); 1624 PetscCall(VecRestoreArray(b, &bb)); 1625 } 1626 1627 /* actually zap the local rows */ 1628 /* 1629 Zero the required rows. If the "diagonal block" of the matrix 1630 is square and the user wishes to set the diagonal we use separate 1631 code so that MatSetValues() is not called for each diagonal allocating 1632 new memory, thus calling lots of mallocs and slowing things down. 1633 1634 */ 1635 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1636 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1637 PetscCall(MatHasCongruentLayouts(A, &cong)); 1638 if ((diag != 0.0) && cong) { 1639 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1640 } else if (diag != 0.0) { 1641 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1642 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1643 for (r = 0; r < len; ++r) { 1644 const PetscInt row = lrows[r] + A->rmap->rstart; 1645 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1646 } 1647 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1648 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1649 } else { 1650 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1651 } 1652 PetscCall(PetscFree(lrows)); 1653 1654 /* only change matrix nonzero state if pattern was allowed to be changed */ 1655 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1656 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1657 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1658 } 1659 PetscFunctionReturn(PETSC_SUCCESS); 1660 } 1661 1662 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1663 { 1664 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1665 PetscMPIInt n, p = 0; 1666 PetscInt i, j, k, r, len = 0, row, col, count; 1667 PetscInt *lrows, *owners = A->rmap->range; 1668 PetscSFNode *rrows; 1669 PetscSF sf; 1670 const PetscScalar *xx; 1671 PetscScalar *bb, *mask; 1672 Vec xmask, lmask; 1673 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1674 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1675 PetscScalar *aa; 1676 1677 PetscFunctionBegin; 1678 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off-diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off-diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(PETSC_SUCCESS); 1757 } 1758 1759 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1806 { 1807 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1808 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1809 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1810 1811 PetscFunctionBegin; 1812 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1817 { 1818 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1819 PetscBLASInt bnz, one = 1; 1820 Mat_SeqBAIJ *x, *y; 1821 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1822 1823 PetscFunctionBegin; 1824 if (str == SAME_NONZERO_PATTERN) { 1825 PetscScalar alpha = a; 1826 x = (Mat_SeqBAIJ *)xx->A->data; 1827 y = (Mat_SeqBAIJ *)yy->A->data; 1828 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1829 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1830 x = (Mat_SeqBAIJ *)xx->B->data; 1831 y = (Mat_SeqBAIJ *)yy->B->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1835 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1836 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1837 } else { 1838 Mat B; 1839 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1840 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1841 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1842 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1843 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1844 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1845 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1846 PetscCall(MatSetType(B, MATMPIBAIJ)); 1847 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1848 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1849 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1850 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1851 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1852 PetscCall(MatHeaderMerge(Y, &B)); 1853 PetscCall(PetscFree(nnz_d)); 1854 PetscCall(PetscFree(nnz_o)); 1855 } 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1860 { 1861 PetscFunctionBegin; 1862 if (PetscDefined(USE_COMPLEX)) { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1864 1865 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1866 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 1871 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1874 1875 PetscFunctionBegin; 1876 PetscCall(MatRealPart(a->A)); 1877 PetscCall(MatRealPart(a->B)); 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1882 { 1883 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1884 1885 PetscFunctionBegin; 1886 PetscCall(MatImaginaryPart(a->A)); 1887 PetscCall(MatImaginaryPart(a->B)); 1888 PetscFunctionReturn(PETSC_SUCCESS); 1889 } 1890 1891 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1892 { 1893 IS iscol_local; 1894 PetscInt csize; 1895 1896 PetscFunctionBegin; 1897 PetscCall(ISGetLocalSize(iscol, &csize)); 1898 if (call == MAT_REUSE_MATRIX) { 1899 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1900 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1901 } else { 1902 PetscCall(ISAllGather(iscol, &iscol_local)); 1903 } 1904 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1905 if (call == MAT_INITIAL_MATRIX) { 1906 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1907 PetscCall(ISDestroy(&iscol_local)); 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 /* 1913 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1914 in local and then by concatenating the local matrices the end result. 1915 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1916 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1917 */ 1918 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1919 { 1920 PetscMPIInt rank, size; 1921 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1922 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1923 Mat M, Mreuse; 1924 MatScalar *vwork, *aa; 1925 MPI_Comm comm; 1926 IS isrow_new, iscol_new; 1927 Mat_SeqBAIJ *aij; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1931 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1932 PetscCallMPI(MPI_Comm_size(comm, &size)); 1933 /* The compression and expansion should be avoided. Doesn't point 1934 out errors, might change the indices, hence buggey */ 1935 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1936 if (isrow == iscol) { 1937 iscol_new = isrow_new; 1938 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1939 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1940 1941 if (call == MAT_REUSE_MATRIX) { 1942 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1943 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1944 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1945 } else { 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1947 } 1948 PetscCall(ISDestroy(&isrow_new)); 1949 PetscCall(ISDestroy(&iscol_new)); 1950 /* 1951 m - number of local rows 1952 n - number of columns (same on all processors) 1953 rstart - first row in new global matrix generated 1954 */ 1955 PetscCall(MatGetBlockSize(mat, &bs)); 1956 PetscCall(MatGetSize(Mreuse, &m, &n)); 1957 m = m / bs; 1958 n = n / bs; 1959 1960 if (call == MAT_INITIAL_MATRIX) { 1961 aij = (Mat_SeqBAIJ *)Mreuse->data; 1962 ii = aij->i; 1963 jj = aij->j; 1964 1965 /* 1966 Determine the number of non-zeros in the diagonal and off-diagonal 1967 portions of the matrix in order to do correct preallocation 1968 */ 1969 1970 /* first get start and end of "diagonal" columns */ 1971 if (csize == PETSC_DECIDE) { 1972 PetscCall(ISGetSize(isrow, &mglobal)); 1973 if (mglobal == n * bs) { /* square matrix */ 1974 nlocal = m; 1975 } else { 1976 nlocal = n / size + ((n % size) > rank); 1977 } 1978 } else { 1979 nlocal = csize / bs; 1980 } 1981 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1982 rstart = rend - nlocal; 1983 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1984 1985 /* next, compute all the lengths */ 1986 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1987 for (i = 0; i < m; i++) { 1988 jend = ii[i + 1] - ii[i]; 1989 olen = 0; 1990 dlen = 0; 1991 for (j = 0; j < jend; j++) { 1992 if (*jj < rstart || *jj >= rend) olen++; 1993 else dlen++; 1994 jj++; 1995 } 1996 olens[i] = olen; 1997 dlens[i] = dlen; 1998 } 1999 PetscCall(MatCreate(comm, &M)); 2000 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2001 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2002 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2003 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2004 PetscCall(PetscFree2(dlens, olens)); 2005 } else { 2006 PetscInt ml, nl; 2007 2008 M = *newmat; 2009 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2010 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2011 PetscCall(MatZeroEntries(M)); 2012 /* 2013 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2014 rather than the slower MatSetValues(). 2015 */ 2016 M->was_assembled = PETSC_TRUE; 2017 M->assembled = PETSC_FALSE; 2018 } 2019 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2020 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2021 aij = (Mat_SeqBAIJ *)Mreuse->data; 2022 ii = aij->i; 2023 jj = aij->j; 2024 aa = aij->a; 2025 for (i = 0; i < m; i++) { 2026 row = rstart / bs + i; 2027 nz = ii[i + 1] - ii[i]; 2028 cwork = jj; 2029 jj = PetscSafePointerPlusOffset(jj, nz); 2030 vwork = aa; 2031 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2032 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2033 } 2034 2035 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2036 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2037 *newmat = M; 2038 2039 /* save submatrix used in processor for next request */ 2040 if (call == MAT_INITIAL_MATRIX) { 2041 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2042 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2043 } 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2048 { 2049 MPI_Comm comm, pcomm; 2050 PetscInt clocal_size, nrows; 2051 const PetscInt *rows; 2052 PetscMPIInt size; 2053 IS crowp, lcolp; 2054 2055 PetscFunctionBegin; 2056 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2057 /* make a collective version of 'rowp' */ 2058 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2059 if (pcomm == comm) { 2060 crowp = rowp; 2061 } else { 2062 PetscCall(ISGetSize(rowp, &nrows)); 2063 PetscCall(ISGetIndices(rowp, &rows)); 2064 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2065 PetscCall(ISRestoreIndices(rowp, &rows)); 2066 } 2067 PetscCall(ISSetPermutation(crowp)); 2068 /* make a local version of 'colp' */ 2069 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2070 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2071 if (size == 1) { 2072 lcolp = colp; 2073 } else { 2074 PetscCall(ISAllGather(colp, &lcolp)); 2075 } 2076 PetscCall(ISSetPermutation(lcolp)); 2077 /* now we just get the submatrix */ 2078 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2079 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2080 /* clean up */ 2081 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2082 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2087 { 2088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2089 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2090 2091 PetscFunctionBegin; 2092 if (nghosts) *nghosts = B->nbs; 2093 if (ghosts) *ghosts = baij->garray; 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2098 { 2099 Mat B; 2100 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2101 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2102 Mat_SeqAIJ *b; 2103 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2104 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2105 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2106 2107 PetscFunctionBegin; 2108 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2109 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2110 2111 /* Tell every processor the number of nonzeros per row */ 2112 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2113 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2114 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2115 displs = recvcounts + size; 2116 for (i = 0; i < size; i++) { 2117 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2118 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2119 } 2120 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121 /* Create the sequential matrix of the same type as the local block diagonal */ 2122 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2123 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2124 PetscCall(MatSetType(B, MATSEQAIJ)); 2125 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2126 b = (Mat_SeqAIJ *)B->data; 2127 2128 /* Copy my part of matrix column indices over */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2134 cnt = 0; 2135 for (i = 0; i < n; i++) { 2136 /* put in lower diagonal portion */ 2137 m = bd->i[i + 1] - bd->i[i]; 2138 while (m > 0) { 2139 /* is it above diagonal (in bd (compressed) numbering) */ 2140 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2141 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 m--; 2143 } 2144 2145 /* put in diagonal portion */ 2146 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2147 2148 /* put in upper diagonal portion */ 2149 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 } 2151 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2152 2153 /* Gather all column indices to all processors */ 2154 for (i = 0; i < size; i++) { 2155 recvcounts[i] = 0; 2156 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2157 } 2158 displs[0] = 0; 2159 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2160 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2161 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2162 /* set the b->ilen (length of each row) values */ 2163 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2164 /* set the b->i indices */ 2165 b->i[0] = 0; 2166 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2167 PetscCall(PetscFree(lens)); 2168 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2169 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2170 PetscCall(PetscFree(recvcounts)); 2171 2172 PetscCall(MatPropagateSymmetryOptions(A, B)); 2173 *newmat = B; 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2178 { 2179 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2180 Vec bb1 = NULL; 2181 2182 PetscFunctionBegin; 2183 if (flag == SOR_APPLY_UPPER) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2189 2190 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2191 if (flag & SOR_ZERO_INITIAL_GUESS) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 its--; 2194 } 2195 2196 while (its--) { 2197 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2198 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2199 2200 /* update rhs: bb1 = bb - B*x */ 2201 PetscCall(VecScale(mat->lvec, -1.0)); 2202 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2203 2204 /* local sweep */ 2205 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2206 } 2207 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2208 if (flag & SOR_ZERO_INITIAL_GUESS) { 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2210 its--; 2211 } 2212 while (its--) { 2213 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2214 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 /* update rhs: bb1 = bb - B*x */ 2217 PetscCall(VecScale(mat->lvec, -1.0)); 2218 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2219 2220 /* local sweep */ 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2222 } 2223 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2224 if (flag & SOR_ZERO_INITIAL_GUESS) { 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2226 its--; 2227 } 2228 while (its--) { 2229 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2230 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2231 2232 /* update rhs: bb1 = bb - B*x */ 2233 PetscCall(VecScale(mat->lvec, -1.0)); 2234 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2235 2236 /* local sweep */ 2237 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2238 } 2239 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2240 2241 PetscCall(VecDestroy(&bb1)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2246 { 2247 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2248 PetscInt m, N, i, *garray = aij->garray; 2249 PetscInt ib, jb, bs = A->rmap->bs; 2250 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2251 MatScalar *a_val = a_aij->a; 2252 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2253 MatScalar *b_val = b_aij->a; 2254 PetscReal *work; 2255 2256 PetscFunctionBegin; 2257 PetscCall(MatGetSize(A, &m, &N)); 2258 PetscCall(PetscCalloc1(N, &work)); 2259 if (type == NORM_2) { 2260 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2264 a_val++; 2265 } 2266 } 2267 } 2268 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2272 b_val++; 2273 } 2274 } 2275 } 2276 } else if (type == NORM_1) { 2277 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2281 a_val++; 2282 } 2283 } 2284 } 2285 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2286 for (jb = 0; jb < bs; jb++) { 2287 for (ib = 0; ib < bs; ib++) { 2288 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2289 b_val++; 2290 } 2291 } 2292 } 2293 } else if (type == NORM_INFINITY) { 2294 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2295 for (jb = 0; jb < bs; jb++) { 2296 for (ib = 0; ib < bs; ib++) { 2297 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2298 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2299 a_val++; 2300 } 2301 } 2302 } 2303 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2304 for (jb = 0; jb < bs; jb++) { 2305 for (ib = 0; ib < bs; ib++) { 2306 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2307 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2313 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2325 b_val++; 2326 } 2327 } 2328 } 2329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2330 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2334 a_val++; 2335 } 2336 } 2337 } 2338 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2339 for (jb = 0; jb < bs; jb++) { 2340 for (ib = 0; ib < bs; ib++) { 2341 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2342 b_val++; 2343 } 2344 } 2345 } 2346 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2347 if (type == NORM_INFINITY) { 2348 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2349 } else { 2350 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2351 } 2352 PetscCall(PetscFree(work)); 2353 if (type == NORM_2) { 2354 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2355 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2356 for (i = 0; i < N; i++) reductions[i] /= m; 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2362 { 2363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2364 2365 PetscFunctionBegin; 2366 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2367 A->factorerrortype = a->A->factorerrortype; 2368 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2369 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2374 { 2375 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2376 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2377 2378 PetscFunctionBegin; 2379 if (!Y->preallocated) { 2380 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2381 } else if (!aij->nz) { 2382 PetscInt nonew = aij->nonew; 2383 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2384 aij->nonew = nonew; 2385 } 2386 PetscCall(MatShift_Basic(Y, a)); 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2391 { 2392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2393 2394 PetscFunctionBegin; 2395 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2396 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2397 if (d) { 2398 PetscInt rstart; 2399 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2400 *d += rstart / A->rmap->bs; 2401 } 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2406 { 2407 PetscFunctionBegin; 2408 *a = ((Mat_MPIBAIJ *)A->data)->A; 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2413 { 2414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2415 2416 PetscFunctionBegin; 2417 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2418 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2423 MatGetRow_MPIBAIJ, 2424 MatRestoreRow_MPIBAIJ, 2425 MatMult_MPIBAIJ, 2426 /* 4*/ MatMultAdd_MPIBAIJ, 2427 MatMultTranspose_MPIBAIJ, 2428 MatMultTransposeAdd_MPIBAIJ, 2429 NULL, 2430 NULL, 2431 NULL, 2432 /*10*/ NULL, 2433 NULL, 2434 NULL, 2435 MatSOR_MPIBAIJ, 2436 MatTranspose_MPIBAIJ, 2437 /*15*/ MatGetInfo_MPIBAIJ, 2438 MatEqual_MPIBAIJ, 2439 MatGetDiagonal_MPIBAIJ, 2440 MatDiagonalScale_MPIBAIJ, 2441 MatNorm_MPIBAIJ, 2442 /*20*/ MatAssemblyBegin_MPIBAIJ, 2443 MatAssemblyEnd_MPIBAIJ, 2444 MatSetOption_MPIBAIJ, 2445 MatZeroEntries_MPIBAIJ, 2446 /*24*/ MatZeroRows_MPIBAIJ, 2447 NULL, 2448 NULL, 2449 NULL, 2450 NULL, 2451 /*29*/ MatSetUp_MPI_Hash, 2452 NULL, 2453 NULL, 2454 MatGetDiagonalBlock_MPIBAIJ, 2455 NULL, 2456 /*34*/ MatDuplicate_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 NULL, 2460 NULL, 2461 /*39*/ MatAXPY_MPIBAIJ, 2462 MatCreateSubMatrices_MPIBAIJ, 2463 MatIncreaseOverlap_MPIBAIJ, 2464 MatGetValues_MPIBAIJ, 2465 MatCopy_MPIBAIJ, 2466 /*44*/ NULL, 2467 MatScale_MPIBAIJ, 2468 MatShift_MPIBAIJ, 2469 NULL, 2470 MatZeroRowsColumns_MPIBAIJ, 2471 /*49*/ NULL, 2472 NULL, 2473 NULL, 2474 NULL, 2475 NULL, 2476 /*54*/ MatFDColoringCreate_MPIXAIJ, 2477 NULL, 2478 MatSetUnfactored_MPIBAIJ, 2479 MatPermute_MPIBAIJ, 2480 MatSetValuesBlocked_MPIBAIJ, 2481 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2482 MatDestroy_MPIBAIJ, 2483 MatView_MPIBAIJ, 2484 NULL, 2485 NULL, 2486 /*64*/ NULL, 2487 NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*74*/ NULL, 2497 MatFDColoringApply_BAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*79*/ NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 MatLoad_MPIBAIJ, 2506 /*84*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*89*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*94*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*99*/ NULL, 2522 NULL, 2523 NULL, 2524 MatConjugate_MPIBAIJ, 2525 NULL, 2526 /*104*/ NULL, 2527 MatRealPart_MPIBAIJ, 2528 MatImaginaryPart_MPIBAIJ, 2529 NULL, 2530 NULL, 2531 /*109*/ NULL, 2532 NULL, 2533 NULL, 2534 NULL, 2535 MatMissingDiagonal_MPIBAIJ, 2536 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2537 NULL, 2538 MatGetGhosts_MPIBAIJ, 2539 NULL, 2540 NULL, 2541 /*119*/ NULL, 2542 NULL, 2543 NULL, 2544 NULL, 2545 MatGetMultiProcBlock_MPIBAIJ, 2546 /*124*/ NULL, 2547 MatGetColumnReductions_MPIBAIJ, 2548 MatInvertBlockDiagonal_MPIBAIJ, 2549 NULL, 2550 NULL, 2551 /*129*/ NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 /*134*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*139*/ MatSetBlockSizes_Default, 2562 NULL, 2563 NULL, 2564 MatFDColoringSetUp_MPIXAIJ, 2565 NULL, 2566 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2567 NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*150*/ NULL, 2573 MatEliminateZeros_MPIBAIJ, 2574 MatGetRowSumAbs_MPIBAIJ, 2575 NULL, 2576 NULL, 2577 /*155*/ NULL, 2578 MatCopyHashToXAIJ_MPI_Hash}; 2579 2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2582 2583 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2584 { 2585 PetscInt m, rstart, cstart, cend; 2586 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2587 const PetscInt *JJ = NULL; 2588 PetscScalar *values = NULL; 2589 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2590 PetscBool nooffprocentries; 2591 2592 PetscFunctionBegin; 2593 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2594 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2595 PetscCall(PetscLayoutSetUp(B->rmap)); 2596 PetscCall(PetscLayoutSetUp(B->cmap)); 2597 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2598 m = B->rmap->n / bs; 2599 rstart = B->rmap->rstart / bs; 2600 cstart = B->cmap->rstart / bs; 2601 cend = B->cmap->rend / bs; 2602 2603 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2604 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2605 for (i = 0; i < m; i++) { 2606 nz = ii[i + 1] - ii[i]; 2607 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2608 nz_max = PetscMax(nz_max, nz); 2609 dlen = 0; 2610 olen = 0; 2611 JJ = jj + ii[i]; 2612 for (j = 0; j < nz; j++) { 2613 if (*JJ < cstart || *JJ >= cend) olen++; 2614 else dlen++; 2615 JJ++; 2616 } 2617 d_nnz[i] = dlen; 2618 o_nnz[i] = olen; 2619 } 2620 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2621 PetscCall(PetscFree2(d_nnz, o_nnz)); 2622 2623 values = (PetscScalar *)V; 2624 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2625 for (i = 0; i < m; i++) { 2626 PetscInt row = i + rstart; 2627 PetscInt ncols = ii[i + 1] - ii[i]; 2628 const PetscInt *icols = jj + ii[i]; 2629 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2630 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2631 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2632 } else { /* block ordering does not match so we can only insert one block at a time. */ 2633 PetscInt j; 2634 for (j = 0; j < ncols; j++) { 2635 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2637 } 2638 } 2639 } 2640 2641 if (!V) PetscCall(PetscFree(values)); 2642 nooffprocentries = B->nooffprocentries; 2643 B->nooffprocentries = PETSC_TRUE; 2644 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2645 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2646 B->nooffprocentries = nooffprocentries; 2647 2648 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@C 2653 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2654 2655 Collective 2656 2657 Input Parameters: 2658 + B - the matrix 2659 . bs - the block size 2660 . i - the indices into `j` for the start of each local row (starts with zero) 2661 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2662 - v - optional values in the matrix, use `NULL` if not provided 2663 2664 Level: advanced 2665 2666 Notes: 2667 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2668 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2669 called this routine. 2670 2671 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2672 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2673 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2674 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2675 block column and the second index is over columns within a block. 2676 2677 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2678 2679 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2680 @*/ 2681 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2682 { 2683 PetscFunctionBegin; 2684 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2685 PetscValidType(B, 1); 2686 PetscValidLogicalCollectiveInt(B, bs, 2); 2687 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2688 PetscFunctionReturn(PETSC_SUCCESS); 2689 } 2690 2691 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2692 { 2693 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2694 PetscInt i; 2695 PetscMPIInt size; 2696 2697 PetscFunctionBegin; 2698 if (B->hash_active) { 2699 B->ops[0] = b->cops; 2700 B->hash_active = PETSC_FALSE; 2701 } 2702 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2703 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2704 PetscCall(PetscLayoutSetUp(B->rmap)); 2705 PetscCall(PetscLayoutSetUp(B->cmap)); 2706 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2707 2708 if (d_nnz) { 2709 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2710 } 2711 if (o_nnz) { 2712 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2713 } 2714 2715 b->bs2 = bs * bs; 2716 b->mbs = B->rmap->n / bs; 2717 b->nbs = B->cmap->n / bs; 2718 b->Mbs = B->rmap->N / bs; 2719 b->Nbs = B->cmap->N / bs; 2720 2721 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2722 b->rstartbs = B->rmap->rstart / bs; 2723 b->rendbs = B->rmap->rend / bs; 2724 b->cstartbs = B->cmap->rstart / bs; 2725 b->cendbs = B->cmap->rend / bs; 2726 2727 #if defined(PETSC_USE_CTABLE) 2728 PetscCall(PetscHMapIDestroy(&b->colmap)); 2729 #else 2730 PetscCall(PetscFree(b->colmap)); 2731 #endif 2732 PetscCall(PetscFree(b->garray)); 2733 PetscCall(VecDestroy(&b->lvec)); 2734 PetscCall(VecScatterDestroy(&b->Mvctx)); 2735 2736 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2737 2738 MatSeqXAIJGetOptions_Private(b->B); 2739 PetscCall(MatDestroy(&b->B)); 2740 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2741 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2742 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2743 MatSeqXAIJRestoreOptions_Private(b->B); 2744 2745 MatSeqXAIJGetOptions_Private(b->A); 2746 PetscCall(MatDestroy(&b->A)); 2747 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2748 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2749 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2750 MatSeqXAIJRestoreOptions_Private(b->A); 2751 2752 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2753 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2754 B->preallocated = PETSC_TRUE; 2755 B->was_assembled = PETSC_FALSE; 2756 B->assembled = PETSC_FALSE; 2757 PetscFunctionReturn(PETSC_SUCCESS); 2758 } 2759 2760 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2761 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2762 2763 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2764 { 2765 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2766 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2767 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2768 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2769 2770 PetscFunctionBegin; 2771 PetscCall(PetscMalloc1(M + 1, &ii)); 2772 ii[0] = 0; 2773 for (i = 0; i < M; i++) { 2774 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2775 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2776 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2777 /* remove one from count of matrix has diagonal */ 2778 for (j = id[i]; j < id[i + 1]; j++) { 2779 if (jd[j] == i) { 2780 ii[i + 1]--; 2781 break; 2782 } 2783 } 2784 } 2785 PetscCall(PetscMalloc1(ii[M], &jj)); 2786 cnt = 0; 2787 for (i = 0; i < M; i++) { 2788 for (j = io[i]; j < io[i + 1]; j++) { 2789 if (garray[jo[j]] > rstart) break; 2790 jj[cnt++] = garray[jo[j]]; 2791 } 2792 for (k = id[i]; k < id[i + 1]; k++) { 2793 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2794 } 2795 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2796 } 2797 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2798 PetscFunctionReturn(PETSC_SUCCESS); 2799 } 2800 2801 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2802 2803 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2804 2805 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2806 { 2807 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2808 Mat_MPIAIJ *b; 2809 Mat B; 2810 2811 PetscFunctionBegin; 2812 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2813 2814 if (reuse == MAT_REUSE_MATRIX) { 2815 B = *newmat; 2816 } else { 2817 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2818 PetscCall(MatSetType(B, MATMPIAIJ)); 2819 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2820 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2821 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2822 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2823 } 2824 b = (Mat_MPIAIJ *)B->data; 2825 2826 if (reuse == MAT_REUSE_MATRIX) { 2827 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2828 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2829 } else { 2830 PetscInt *garray = a->garray; 2831 Mat_SeqAIJ *bB; 2832 PetscInt bs, nnz; 2833 PetscCall(MatDestroy(&b->A)); 2834 PetscCall(MatDestroy(&b->B)); 2835 /* just clear out the data structure */ 2836 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2837 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2838 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2839 2840 /* Global numbering for b->B columns */ 2841 bB = (Mat_SeqAIJ *)b->B->data; 2842 bs = A->rmap->bs; 2843 nnz = bB->i[A->rmap->n]; 2844 for (PetscInt k = 0; k < nnz; k++) { 2845 PetscInt bj = bB->j[k] / bs; 2846 PetscInt br = bB->j[k] % bs; 2847 bB->j[k] = garray[bj] * bs + br; 2848 } 2849 } 2850 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2851 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2852 2853 if (reuse == MAT_INPLACE_MATRIX) { 2854 PetscCall(MatHeaderReplace(A, &B)); 2855 } else { 2856 *newmat = B; 2857 } 2858 PetscFunctionReturn(PETSC_SUCCESS); 2859 } 2860 2861 /*MC 2862 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2863 2864 Options Database Keys: 2865 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2866 . -mat_block_size <bs> - set the blocksize used to store the matrix 2867 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2868 - -mat_use_hash_table <fact> - set hash table factor 2869 2870 Level: beginner 2871 2872 Note: 2873 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2874 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2875 2876 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2877 M*/ 2878 2879 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2880 2881 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2882 { 2883 Mat_MPIBAIJ *b; 2884 PetscBool flg = PETSC_FALSE; 2885 2886 PetscFunctionBegin; 2887 PetscCall(PetscNew(&b)); 2888 B->data = (void *)b; 2889 B->ops[0] = MatOps_Values; 2890 B->assembled = PETSC_FALSE; 2891 2892 B->insertmode = NOT_SET_VALUES; 2893 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2894 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2895 2896 /* build local table of row and column ownerships */ 2897 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2898 2899 /* build cache for off array entries formed */ 2900 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2901 2902 b->donotstash = PETSC_FALSE; 2903 b->colmap = NULL; 2904 b->garray = NULL; 2905 b->roworiented = PETSC_TRUE; 2906 2907 /* stuff used in block assembly */ 2908 b->barray = NULL; 2909 2910 /* stuff used for matrix vector multiply */ 2911 b->lvec = NULL; 2912 b->Mvctx = NULL; 2913 2914 /* stuff for MatGetRow() */ 2915 b->rowindices = NULL; 2916 b->rowvalues = NULL; 2917 b->getrowactive = PETSC_FALSE; 2918 2919 /* hash table stuff */ 2920 b->ht = NULL; 2921 b->hd = NULL; 2922 b->ht_size = 0; 2923 b->ht_flag = PETSC_FALSE; 2924 b->ht_fact = 0; 2925 b->ht_total_ct = 0; 2926 b->ht_insert_ct = 0; 2927 2928 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2929 b->ijonly = PETSC_FALSE; 2930 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2934 #if defined(PETSC_HAVE_HYPRE) 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2936 #endif 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2944 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2945 2946 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2947 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2948 if (flg) { 2949 PetscReal fact = 1.39; 2950 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2951 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2952 if (fact <= 1.0) fact = 1.39; 2953 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2954 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2955 } 2956 PetscOptionsEnd(); 2957 PetscFunctionReturn(PETSC_SUCCESS); 2958 } 2959 2960 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2961 /*MC 2962 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2963 2964 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2965 and `MATMPIBAIJ` otherwise. 2966 2967 Options Database Keys: 2968 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2969 2970 Level: beginner 2971 2972 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2973 M*/ 2974 2975 /*@ 2976 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2977 (block compressed row). 2978 2979 Collective 2980 2981 Input Parameters: 2982 + B - the matrix 2983 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2984 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2985 . d_nz - number of block nonzeros per block row in diagonal portion of local 2986 submatrix (same for all local rows) 2987 . d_nnz - array containing the number of block nonzeros in the various block rows 2988 of the in diagonal portion of the local (possibly different for each block 2989 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2990 set it even if it is zero. 2991 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2992 submatrix (same for all local rows). 2993 - o_nnz - array containing the number of nonzeros in the various block rows of the 2994 off-diagonal portion of the local submatrix (possibly different for 2995 each block row) or `NULL`. 2996 2997 If the *_nnz parameter is given then the *_nz parameter is ignored 2998 2999 Options Database Keys: 3000 + -mat_block_size - size of the blocks to use 3001 - -mat_use_hash_table <fact> - set hash table factor 3002 3003 Level: intermediate 3004 3005 Notes: 3006 For good matrix assembly performance 3007 the user should preallocate the matrix storage by setting the parameters 3008 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3009 performance can be increased by more than a factor of 50. 3010 3011 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3012 than it must be used on all processors that share the object for that argument. 3013 3014 Storage Information: 3015 For a square global matrix we define each processor's diagonal portion 3016 to be its local rows and the corresponding columns (a square submatrix); 3017 each processor's off-diagonal portion encompasses the remainder of the 3018 local matrix (a rectangular submatrix). 3019 3020 The user can specify preallocated storage for the diagonal part of 3021 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3022 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3023 memory allocation. Likewise, specify preallocated storage for the 3024 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3025 3026 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3027 the figure below we depict these three local rows and all columns (0-11). 3028 3029 .vb 3030 0 1 2 3 4 5 6 7 8 9 10 11 3031 -------------------------- 3032 row 3 |o o o d d d o o o o o o 3033 row 4 |o o o d d d o o o o o o 3034 row 5 |o o o d d d o o o o o o 3035 -------------------------- 3036 .ve 3037 3038 Thus, any entries in the d locations are stored in the d (diagonal) 3039 submatrix, and any entries in the o locations are stored in the 3040 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3041 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3042 3043 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3044 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3045 In general, for PDE problems in which most nonzeros are near the diagonal, 3046 one expects `d_nz` >> `o_nz`. 3047 3048 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3049 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3050 You can also run with the option `-info` and look for messages with the string 3051 malloc in them to see if additional memory allocation was needed. 3052 3053 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3054 @*/ 3055 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3056 { 3057 PetscFunctionBegin; 3058 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3059 PetscValidType(B, 1); 3060 PetscValidLogicalCollectiveInt(B, bs, 2); 3061 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3062 PetscFunctionReturn(PETSC_SUCCESS); 3063 } 3064 3065 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3066 /*@ 3067 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3068 (block compressed row). 3069 3070 Collective 3071 3072 Input Parameters: 3073 + comm - MPI communicator 3074 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3075 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3076 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3077 This value should be the same as the local size used in creating the 3078 y vector for the matrix-vector product y = Ax. 3079 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3080 This value should be the same as the local size used in creating the 3081 x vector for the matrix-vector product y = Ax. 3082 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3083 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3084 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3085 submatrix (same for all local rows) 3086 . d_nnz - array containing the number of nonzero blocks in the various block rows 3087 of the in diagonal portion of the local (possibly different for each block 3088 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3089 and set it even if it is zero. 3090 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3091 submatrix (same for all local rows). 3092 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3093 off-diagonal portion of the local submatrix (possibly different for 3094 each block row) or NULL. 3095 3096 Output Parameter: 3097 . A - the matrix 3098 3099 Options Database Keys: 3100 + -mat_block_size - size of the blocks to use 3101 - -mat_use_hash_table <fact> - set hash table factor 3102 3103 Level: intermediate 3104 3105 Notes: 3106 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3107 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3108 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3109 3110 For good matrix assembly performance 3111 the user should preallocate the matrix storage by setting the parameters 3112 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3113 performance can be increased by more than a factor of 50. 3114 3115 If the *_nnz parameter is given then the *_nz parameter is ignored 3116 3117 A nonzero block is any block that as 1 or more nonzeros in it 3118 3119 The user MUST specify either the local or global matrix dimensions 3120 (possibly both). 3121 3122 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3123 than it must be used on all processors that share the object for that argument. 3124 3125 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3126 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3127 3128 Storage Information: 3129 For a square global matrix we define each processor's diagonal portion 3130 to be its local rows and the corresponding columns (a square submatrix); 3131 each processor's off-diagonal portion encompasses the remainder of the 3132 local matrix (a rectangular submatrix). 3133 3134 The user can specify preallocated storage for the diagonal part of 3135 the local submatrix with either d_nz or d_nnz (not both). Set 3136 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3137 memory allocation. Likewise, specify preallocated storage for the 3138 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3139 3140 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3141 the figure below we depict these three local rows and all columns (0-11). 3142 3143 .vb 3144 0 1 2 3 4 5 6 7 8 9 10 11 3145 -------------------------- 3146 row 3 |o o o d d d o o o o o o 3147 row 4 |o o o d d d o o o o o o 3148 row 5 |o o o d d d o o o o o o 3149 -------------------------- 3150 .ve 3151 3152 Thus, any entries in the d locations are stored in the d (diagonal) 3153 submatrix, and any entries in the o locations are stored in the 3154 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3155 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3156 3157 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3158 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3159 In general, for PDE problems in which most nonzeros are near the diagonal, 3160 one expects `d_nz` >> `o_nz`. 3161 3162 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3163 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3164 @*/ 3165 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3166 { 3167 PetscMPIInt size; 3168 3169 PetscFunctionBegin; 3170 PetscCall(MatCreate(comm, A)); 3171 PetscCall(MatSetSizes(*A, m, n, M, N)); 3172 PetscCallMPI(MPI_Comm_size(comm, &size)); 3173 if (size > 1) { 3174 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3175 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3176 } else { 3177 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3178 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3179 } 3180 PetscFunctionReturn(PETSC_SUCCESS); 3181 } 3182 3183 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3184 { 3185 Mat mat; 3186 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3187 PetscInt len = 0; 3188 3189 PetscFunctionBegin; 3190 *newmat = NULL; 3191 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3192 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3193 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3194 3195 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3196 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3197 if (matin->hash_active) { 3198 PetscCall(MatSetUp(mat)); 3199 } else { 3200 mat->factortype = matin->factortype; 3201 mat->preallocated = PETSC_TRUE; 3202 mat->assembled = PETSC_TRUE; 3203 mat->insertmode = NOT_SET_VALUES; 3204 3205 a = (Mat_MPIBAIJ *)mat->data; 3206 mat->rmap->bs = matin->rmap->bs; 3207 a->bs2 = oldmat->bs2; 3208 a->mbs = oldmat->mbs; 3209 a->nbs = oldmat->nbs; 3210 a->Mbs = oldmat->Mbs; 3211 a->Nbs = oldmat->Nbs; 3212 3213 a->size = oldmat->size; 3214 a->rank = oldmat->rank; 3215 a->donotstash = oldmat->donotstash; 3216 a->roworiented = oldmat->roworiented; 3217 a->rowindices = NULL; 3218 a->rowvalues = NULL; 3219 a->getrowactive = PETSC_FALSE; 3220 a->barray = NULL; 3221 a->rstartbs = oldmat->rstartbs; 3222 a->rendbs = oldmat->rendbs; 3223 a->cstartbs = oldmat->cstartbs; 3224 a->cendbs = oldmat->cendbs; 3225 3226 /* hash table stuff */ 3227 a->ht = NULL; 3228 a->hd = NULL; 3229 a->ht_size = 0; 3230 a->ht_flag = oldmat->ht_flag; 3231 a->ht_fact = oldmat->ht_fact; 3232 a->ht_total_ct = 0; 3233 a->ht_insert_ct = 0; 3234 3235 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3236 if (oldmat->colmap) { 3237 #if defined(PETSC_USE_CTABLE) 3238 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3239 #else 3240 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3241 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3242 #endif 3243 } else a->colmap = NULL; 3244 3245 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3246 PetscCall(PetscMalloc1(len, &a->garray)); 3247 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3248 } else a->garray = NULL; 3249 3250 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3251 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3252 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3253 3254 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3255 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3256 } 3257 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3258 *newmat = mat; 3259 PetscFunctionReturn(PETSC_SUCCESS); 3260 } 3261 3262 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3263 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3264 { 3265 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3266 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3267 PetscScalar *matvals; 3268 3269 PetscFunctionBegin; 3270 PetscCall(PetscViewerSetUp(viewer)); 3271 3272 /* read in matrix header */ 3273 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3274 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3275 M = header[1]; 3276 N = header[2]; 3277 nz = header[3]; 3278 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3279 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3280 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3281 3282 /* set block sizes from the viewer's .info file */ 3283 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3284 /* set local sizes if not set already */ 3285 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3286 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3287 /* set global sizes if not set already */ 3288 if (mat->rmap->N < 0) mat->rmap->N = M; 3289 if (mat->cmap->N < 0) mat->cmap->N = N; 3290 PetscCall(PetscLayoutSetUp(mat->rmap)); 3291 PetscCall(PetscLayoutSetUp(mat->cmap)); 3292 3293 /* check if the matrix sizes are correct */ 3294 PetscCall(MatGetSize(mat, &rows, &cols)); 3295 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3296 PetscCall(MatGetBlockSize(mat, &bs)); 3297 PetscCall(MatGetLocalSize(mat, &m, &n)); 3298 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3299 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3300 mbs = m / bs; 3301 nbs = n / bs; 3302 3303 /* read in row lengths and build row indices */ 3304 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3305 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3306 rowidxs[0] = 0; 3307 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3308 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3309 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3310 3311 /* read in column indices and matrix values */ 3312 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3313 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3314 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3315 3316 { /* preallocate matrix storage */ 3317 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3318 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3319 PetscBool sbaij, done; 3320 PetscInt *d_nnz, *o_nnz; 3321 3322 PetscCall(PetscBTCreate(nbs, &bt)); 3323 PetscCall(PetscHSetICreate(&ht)); 3324 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3325 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3326 for (i = 0; i < mbs; i++) { 3327 PetscCall(PetscBTMemzero(nbs, bt)); 3328 PetscCall(PetscHSetIClear(ht)); 3329 for (k = 0; k < bs; k++) { 3330 PetscInt row = bs * i + k; 3331 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3332 PetscInt col = colidxs[j]; 3333 if (!sbaij || col >= row) { 3334 if (col >= cs && col < ce) { 3335 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3336 } else { 3337 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3338 if (done) o_nnz[i]++; 3339 } 3340 } 3341 } 3342 } 3343 } 3344 PetscCall(PetscBTDestroy(&bt)); 3345 PetscCall(PetscHSetIDestroy(&ht)); 3346 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3347 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3348 PetscCall(PetscFree2(d_nnz, o_nnz)); 3349 } 3350 3351 /* store matrix values */ 3352 for (i = 0; i < m; i++) { 3353 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3354 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3355 } 3356 3357 PetscCall(PetscFree(rowidxs)); 3358 PetscCall(PetscFree2(colidxs, matvals)); 3359 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3360 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3361 PetscFunctionReturn(PETSC_SUCCESS); 3362 } 3363 3364 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3365 { 3366 PetscBool isbinary; 3367 3368 PetscFunctionBegin; 3369 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3370 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3371 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3372 PetscFunctionReturn(PETSC_SUCCESS); 3373 } 3374 3375 /*@ 3376 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3377 3378 Input Parameters: 3379 + mat - the matrix 3380 - fact - factor 3381 3382 Options Database Key: 3383 . -mat_use_hash_table <fact> - provide the factor 3384 3385 Level: advanced 3386 3387 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3388 @*/ 3389 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3390 { 3391 PetscFunctionBegin; 3392 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3393 PetscFunctionReturn(PETSC_SUCCESS); 3394 } 3395 3396 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3397 { 3398 Mat_MPIBAIJ *baij; 3399 3400 PetscFunctionBegin; 3401 baij = (Mat_MPIBAIJ *)mat->data; 3402 baij->ht_fact = fact; 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } 3405 3406 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3407 { 3408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3409 PetscBool flg; 3410 3411 PetscFunctionBegin; 3412 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3413 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3414 if (Ad) *Ad = a->A; 3415 if (Ao) *Ao = a->B; 3416 if (colmap) *colmap = a->garray; 3417 PetscFunctionReturn(PETSC_SUCCESS); 3418 } 3419 3420 /* 3421 Special version for direct calls from Fortran (to eliminate two function call overheads 3422 */ 3423 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3424 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3425 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3426 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3427 #endif 3428 3429 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3430 /*@C 3431 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3432 3433 Collective 3434 3435 Input Parameters: 3436 + matin - the matrix 3437 . min - number of input rows 3438 . im - input rows 3439 . nin - number of input columns 3440 . in - input columns 3441 . v - numerical values input 3442 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3443 3444 Level: advanced 3445 3446 Developer Notes: 3447 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3448 3449 .seealso: `Mat`, `MatSetValuesBlocked()` 3450 @*/ 3451 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3452 { 3453 /* convert input arguments to C version */ 3454 Mat mat = *matin; 3455 PetscInt m = *min, n = *nin; 3456 InsertMode addv = *addvin; 3457 3458 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3459 const MatScalar *value; 3460 MatScalar *barray = baij->barray; 3461 PetscBool roworiented = baij->roworiented; 3462 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3463 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3464 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3465 3466 PetscFunctionBegin; 3467 /* tasks normally handled by MatSetValuesBlocked() */ 3468 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3469 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3470 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3471 if (mat->assembled) { 3472 mat->was_assembled = PETSC_TRUE; 3473 mat->assembled = PETSC_FALSE; 3474 } 3475 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3476 3477 if (!barray) { 3478 PetscCall(PetscMalloc1(bs2, &barray)); 3479 baij->barray = barray; 3480 } 3481 3482 if (roworiented) stepval = (n - 1) * bs; 3483 else stepval = (m - 1) * bs; 3484 3485 for (i = 0; i < m; i++) { 3486 if (im[i] < 0) continue; 3487 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3488 if (im[i] >= rstart && im[i] < rend) { 3489 row = im[i] - rstart; 3490 for (j = 0; j < n; j++) { 3491 /* If NumCol = 1 then a copy is not required */ 3492 if ((roworiented) && (n == 1)) { 3493 barray = (MatScalar *)v + i * bs2; 3494 } else if ((!roworiented) && (m == 1)) { 3495 barray = (MatScalar *)v + j * bs2; 3496 } else { /* Here a copy is required */ 3497 if (roworiented) { 3498 value = v + i * (stepval + bs) * bs + j * bs; 3499 } else { 3500 value = v + j * (stepval + bs) * bs + i * bs; 3501 } 3502 for (ii = 0; ii < bs; ii++, value += stepval) { 3503 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3504 } 3505 barray -= bs2; 3506 } 3507 3508 if (in[j] >= cstart && in[j] < cend) { 3509 col = in[j] - cstart; 3510 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3511 } else if (in[j] < 0) { 3512 continue; 3513 } else { 3514 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3515 if (mat->was_assembled) { 3516 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3517 3518 #if defined(PETSC_USE_DEBUG) 3519 #if defined(PETSC_USE_CTABLE) 3520 { 3521 PetscInt data; 3522 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3523 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3524 } 3525 #else 3526 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3527 #endif 3528 #endif 3529 #if defined(PETSC_USE_CTABLE) 3530 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3531 col = (col - 1) / bs; 3532 #else 3533 col = (baij->colmap[in[j]] - 1) / bs; 3534 #endif 3535 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3536 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3537 col = in[j]; 3538 } 3539 } else col = in[j]; 3540 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3541 } 3542 } 3543 } else { 3544 if (!baij->donotstash) { 3545 if (roworiented) { 3546 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3547 } else { 3548 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3549 } 3550 } 3551 } 3552 } 3553 3554 /* task normally handled by MatSetValuesBlocked() */ 3555 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3556 PetscFunctionReturn(PETSC_SUCCESS); 3557 } 3558 3559 /*@ 3560 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3561 3562 Collective 3563 3564 Input Parameters: 3565 + comm - MPI communicator 3566 . bs - the block size, only a block size of 1 is supported 3567 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3568 . n - This value should be the same as the local size used in creating the 3569 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3570 calculated if `N` is given) For square matrices `n` is almost always `m`. 3571 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3572 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3573 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3574 . j - column indices 3575 - a - matrix values 3576 3577 Output Parameter: 3578 . mat - the matrix 3579 3580 Level: intermediate 3581 3582 Notes: 3583 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3584 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3585 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3586 3587 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3588 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3589 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3590 with column-major ordering within blocks. 3591 3592 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3593 3594 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3595 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3596 @*/ 3597 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3598 { 3599 PetscFunctionBegin; 3600 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3601 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3602 PetscCall(MatCreate(comm, mat)); 3603 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3604 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3605 PetscCall(MatSetBlockSize(*mat, bs)); 3606 PetscCall(MatSetUp(*mat)); 3607 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3608 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3609 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3610 PetscFunctionReturn(PETSC_SUCCESS); 3611 } 3612 3613 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3614 { 3615 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3616 PetscInt *indx; 3617 PetscScalar *values; 3618 3619 PetscFunctionBegin; 3620 PetscCall(MatGetSize(inmat, &m, &N)); 3621 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3622 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3623 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3624 PetscInt *bindx, rmax = a->rmax, j; 3625 PetscMPIInt rank, size; 3626 3627 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3628 mbs = m / bs; 3629 Nbs = N / cbs; 3630 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3631 nbs = n / cbs; 3632 3633 PetscCall(PetscMalloc1(rmax, &bindx)); 3634 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3635 3636 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3637 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3638 if (rank == size - 1) { 3639 /* Check sum(nbs) = Nbs */ 3640 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3641 } 3642 3643 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3644 for (i = 0; i < mbs; i++) { 3645 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3646 nnz = nnz / bs; 3647 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3648 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3649 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3650 } 3651 PetscCall(PetscFree(bindx)); 3652 3653 PetscCall(MatCreate(comm, outmat)); 3654 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3655 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3656 PetscCall(MatSetType(*outmat, MATBAIJ)); 3657 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3658 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3659 MatPreallocateEnd(dnz, onz); 3660 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3661 } 3662 3663 /* numeric phase */ 3664 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3665 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3666 3667 for (i = 0; i < m; i++) { 3668 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3669 Ii = i + rstart; 3670 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3671 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3672 } 3673 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3674 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3675 PetscFunctionReturn(PETSC_SUCCESS); 3676 } 3677