1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 721 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, tmp, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp[j] > *nrm) *nrm = tmp[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 } else if (type == NORM_INFINITY) { /* max row sum */ 753 PetscReal *sums; 754 PetscCall(PetscMalloc1(bs, &sums)); 755 sum = 0.0; 756 for (j = 0; j < amat->mbs; j++) { 757 for (row = 0; row < bs; row++) sums[row] = 0.0; 758 v = amat->a + bs2 * amat->i[j]; 759 nz = amat->i[j + 1] - amat->i[j]; 760 for (i = 0; i < nz; i++) { 761 for (col = 0; col < bs; col++) { 762 for (row = 0; row < bs; row++) { 763 sums[row] += PetscAbsScalar(*v); 764 v++; 765 } 766 } 767 } 768 v = bmat->a + bs2 * bmat->i[j]; 769 nz = bmat->i[j + 1] - bmat->i[j]; 770 for (i = 0; i < nz; i++) { 771 for (col = 0; col < bs; col++) { 772 for (row = 0; row < bs; row++) { 773 sums[row] += PetscAbsScalar(*v); 774 v++; 775 } 776 } 777 } 778 for (row = 0; row < bs; row++) { 779 if (sums[row] > sum) sum = sums[row]; 780 } 781 } 782 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 783 PetscCall(PetscFree(sums)); 784 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 785 } 786 PetscFunctionReturn(PETSC_SUCCESS); 787 } 788 789 /* 790 Creates the hash table, and sets the table 791 This table is created only once. 792 If new entried need to be added to the matrix 793 then the hash table has to be destroyed and 794 recreated. 795 */ 796 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 797 { 798 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 799 Mat A = baij->A, B = baij->B; 800 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 801 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 802 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 803 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 804 PetscInt *HT, key; 805 MatScalar **HD; 806 PetscReal tmp; 807 #if defined(PETSC_USE_INFO) 808 PetscInt ct = 0, max = 0; 809 #endif 810 811 PetscFunctionBegin; 812 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 813 814 baij->ht_size = (PetscInt)(factor * nz); 815 ht_size = baij->ht_size; 816 817 /* Allocate Memory for Hash Table */ 818 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 819 HD = baij->hd; 820 HT = baij->ht; 821 822 /* Loop Over A */ 823 for (i = 0; i < a->mbs; i++) { 824 for (j = ai[i]; j < ai[i + 1]; j++) { 825 row = i + rstart; 826 col = aj[j] + cstart; 827 828 key = row * Nbs + col + 1; 829 h1 = HASH(ht_size, key, tmp); 830 for (k = 0; k < ht_size; k++) { 831 if (!HT[(h1 + k) % ht_size]) { 832 HT[(h1 + k) % ht_size] = key; 833 HD[(h1 + k) % ht_size] = a->a + j * bs2; 834 break; 835 #if defined(PETSC_USE_INFO) 836 } else { 837 ct++; 838 #endif 839 } 840 } 841 #if defined(PETSC_USE_INFO) 842 if (k > max) max = k; 843 #endif 844 } 845 } 846 /* Loop Over B */ 847 for (i = 0; i < b->mbs; i++) { 848 for (j = bi[i]; j < bi[i + 1]; j++) { 849 row = i + rstart; 850 col = garray[bj[j]]; 851 key = row * Nbs + col + 1; 852 h1 = HASH(ht_size, key, tmp); 853 for (k = 0; k < ht_size; k++) { 854 if (!HT[(h1 + k) % ht_size]) { 855 HT[(h1 + k) % ht_size] = key; 856 HD[(h1 + k) % ht_size] = b->a + j * bs2; 857 break; 858 #if defined(PETSC_USE_INFO) 859 } else { 860 ct++; 861 #endif 862 } 863 } 864 #if defined(PETSC_USE_INFO) 865 if (k > max) max = k; 866 #endif 867 } 868 } 869 870 /* Print Summary */ 871 #if defined(PETSC_USE_INFO) 872 for (i = 0, j = 0; i < ht_size; i++) { 873 if (HT[i]) j++; 874 } 875 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 876 #endif 877 PetscFunctionReturn(PETSC_SUCCESS); 878 } 879 880 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 881 { 882 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 883 PetscInt nstash, reallocs; 884 885 PetscFunctionBegin; 886 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 887 888 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 890 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 891 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 892 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 893 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 894 PetscFunctionReturn(PETSC_SUCCESS); 895 } 896 897 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 898 { 899 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 900 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 901 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 902 PetscInt *row, *col; 903 PetscBool r1, r2, r3, other_disassembled; 904 MatScalar *val; 905 PetscMPIInt n; 906 907 PetscFunctionBegin; 908 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 909 if (!baij->donotstash && !mat->nooffprocentries) { 910 while (1) { 911 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 912 if (!flg) break; 913 914 for (i = 0; i < n;) { 915 /* Now identify the consecutive vals belonging to the same row */ 916 for (j = i, rstart = row[j]; j < n; j++) { 917 if (row[j] != rstart) break; 918 } 919 if (j < n) ncols = j - i; 920 else ncols = n - i; 921 /* Now assemble all these values with a single function call */ 922 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 923 i = j; 924 } 925 } 926 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 927 /* Now process the block-stash. Since the values are stashed column-oriented, 928 set the row-oriented flag to column-oriented, and after MatSetValues() 929 restore the original flags */ 930 r1 = baij->roworiented; 931 r2 = a->roworiented; 932 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 933 934 baij->roworiented = PETSC_FALSE; 935 a->roworiented = PETSC_FALSE; 936 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 937 while (1) { 938 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 939 if (!flg) break; 940 941 for (i = 0; i < n;) { 942 /* Now identify the consecutive vals belonging to the same row */ 943 for (j = i, rstart = row[j]; j < n; j++) { 944 if (row[j] != rstart) break; 945 } 946 if (j < n) ncols = j - i; 947 else ncols = n - i; 948 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 949 i = j; 950 } 951 } 952 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 953 954 baij->roworiented = r1; 955 a->roworiented = r2; 956 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 957 } 958 959 PetscCall(MatAssemblyBegin(baij->A, mode)); 960 PetscCall(MatAssemblyEnd(baij->A, mode)); 961 962 /* determine if any processor has disassembled, if so we must 963 also disassemble ourselves, in order that we may reassemble. */ 964 /* 965 if nonzero structure of submatrix B cannot change then we know that 966 no processor disassembled thus we can skip this stuff 967 */ 968 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 969 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 970 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 971 } 972 973 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 974 PetscCall(MatAssemblyBegin(baij->B, mode)); 975 PetscCall(MatAssemblyEnd(baij->B, mode)); 976 977 #if defined(PETSC_USE_INFO) 978 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 979 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 980 981 baij->ht_total_ct = 0; 982 baij->ht_insert_ct = 0; 983 } 984 #endif 985 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 987 988 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 989 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 990 } 991 992 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 993 994 baij->rowvalues = NULL; 995 996 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 997 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 998 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 999 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1000 } 1001 PetscFunctionReturn(PETSC_SUCCESS); 1002 } 1003 1004 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1005 #include <petscdraw.h> 1006 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1007 { 1008 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1009 PetscMPIInt rank = baij->rank; 1010 PetscInt bs = mat->rmap->bs; 1011 PetscBool iascii, isdraw; 1012 PetscViewer sviewer; 1013 PetscViewerFormat format; 1014 1015 PetscFunctionBegin; 1016 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1018 if (iascii) { 1019 PetscCall(PetscViewerGetFormat(viewer, &format)); 1020 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1021 MatInfo info; 1022 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1023 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1024 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1025 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1026 mat->rmap->bs, info.memory)); 1027 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1029 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1031 PetscCall(PetscViewerFlush(viewer)); 1032 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1033 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1034 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1037 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } 1042 } 1043 1044 if (isdraw) { 1045 PetscDraw draw; 1046 PetscBool isnull; 1047 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1048 PetscCall(PetscDrawIsNull(draw, &isnull)); 1049 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1050 } 1051 1052 { 1053 /* assemble the entire matrix onto first processor. */ 1054 Mat A; 1055 Mat_SeqBAIJ *Aloc; 1056 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1057 MatScalar *a; 1058 const char *matname; 1059 1060 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1061 /* Perhaps this should be the type of mat? */ 1062 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1063 if (rank == 0) { 1064 PetscCall(MatSetSizes(A, M, N, M, N)); 1065 } else { 1066 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1067 } 1068 PetscCall(MatSetType(A, MATMPIBAIJ)); 1069 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1070 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1071 1072 /* copy over the A part */ 1073 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1074 ai = Aloc->i; 1075 aj = Aloc->j; 1076 a = Aloc->a; 1077 PetscCall(PetscMalloc1(bs, &rvals)); 1078 1079 for (i = 0; i < mbs; i++) { 1080 rvals[0] = bs * (baij->rstartbs + i); 1081 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1082 for (j = ai[i]; j < ai[i + 1]; j++) { 1083 col = (baij->cstartbs + aj[j]) * bs; 1084 for (k = 0; k < bs; k++) { 1085 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1086 col++; 1087 a += bs; 1088 } 1089 } 1090 } 1091 /* copy over the B part */ 1092 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1093 ai = Aloc->i; 1094 aj = Aloc->j; 1095 a = Aloc->a; 1096 for (i = 0; i < mbs; i++) { 1097 rvals[0] = bs * (baij->rstartbs + i); 1098 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1099 for (j = ai[i]; j < ai[i + 1]; j++) { 1100 col = baij->garray[aj[j]] * bs; 1101 for (k = 0; k < bs; k++) { 1102 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1103 col++; 1104 a += bs; 1105 } 1106 } 1107 } 1108 PetscCall(PetscFree(rvals)); 1109 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1110 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1111 /* 1112 Everyone has to call to draw the matrix since the graphics waits are 1113 synchronized across all processors that share the PetscDraw object 1114 */ 1115 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1116 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1117 if (rank == 0) { 1118 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1119 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1120 } 1121 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1122 PetscCall(MatDestroy(&A)); 1123 } 1124 PetscFunctionReturn(PETSC_SUCCESS); 1125 } 1126 1127 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1128 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1129 { 1130 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1131 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1132 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1133 const PetscInt *garray = aij->garray; 1134 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1135 PetscCount nz, hnz; 1136 PetscInt *rowlens, *colidxs; 1137 PetscScalar *matvals; 1138 PetscMPIInt rank; 1139 1140 PetscFunctionBegin; 1141 PetscCall(PetscViewerSetUp(viewer)); 1142 1143 M = mat->rmap->N; 1144 N = mat->cmap->N; 1145 m = mat->rmap->n; 1146 rs = mat->rmap->rstart; 1147 cs = mat->cmap->rstart; 1148 bs = mat->rmap->bs; 1149 nz = bs * bs * (A->nz + B->nz); 1150 1151 /* write matrix header */ 1152 header[0] = MAT_FILE_CLASSID; 1153 header[1] = M; 1154 header[2] = N; 1155 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1156 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1157 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1158 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1159 1160 /* fill in and store row lengths */ 1161 PetscCall(PetscMalloc1(m, &rowlens)); 1162 for (cnt = 0, i = 0; i < A->mbs; i++) 1163 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1164 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1165 PetscCall(PetscFree(rowlens)); 1166 1167 /* fill in and store column indices */ 1168 PetscCall(PetscMalloc1(nz, &colidxs)); 1169 for (cnt = 0, i = 0; i < A->mbs; i++) { 1170 for (k = 0; k < bs; k++) { 1171 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1172 if (garray[B->j[jb]] > cs / bs) break; 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1174 } 1175 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1177 for (; jb < B->i[i + 1]; jb++) 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 } 1181 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1182 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1183 PetscCall(PetscFree(colidxs)); 1184 1185 /* fill in and store nonzero values */ 1186 PetscCall(PetscMalloc1(nz, &matvals)); 1187 for (cnt = 0, i = 0; i < A->mbs; i++) { 1188 for (k = 0; k < bs; k++) { 1189 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1190 if (garray[B->j[jb]] > cs / bs) break; 1191 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1192 } 1193 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1194 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1195 for (; jb < B->i[i + 1]; jb++) 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 } 1199 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1200 PetscCall(PetscFree(matvals)); 1201 1202 /* write block size option to the viewer's .info file */ 1203 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1204 PetscFunctionReturn(PETSC_SUCCESS); 1205 } 1206 1207 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1208 { 1209 PetscBool iascii, isdraw, issocket, isbinary; 1210 1211 PetscFunctionBegin; 1212 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1216 if (iascii || isdraw || issocket) { 1217 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1218 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1219 PetscFunctionReturn(PETSC_SUCCESS); 1220 } 1221 1222 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(PETSC_SUCCESS); 1237 } 1238 1239 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(PETSC_SUCCESS); 1264 } 1265 1266 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(PETSC_SUCCESS); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(PETSC_SUCCESS); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(PETSC_SUCCESS); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(PETSC_SUCCESS); 1394 } 1395 1396 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(PETSC_SUCCESS); 1404 } 1405 1406 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(PETSC_SUCCESS); 1458 } 1459 1460 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_IGNORE_OFF_PROC_ENTRIES: 1483 a->donotstash = flg; 1484 break; 1485 case MAT_USE_HASH_TABLE: 1486 a->ht_flag = flg; 1487 a->ht_fact = 1.39; 1488 break; 1489 case MAT_SYMMETRIC: 1490 case MAT_STRUCTURALLY_SYMMETRIC: 1491 case MAT_HERMITIAN: 1492 case MAT_SYMMETRY_ETERNAL: 1493 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1494 case MAT_SPD_ETERNAL: 1495 /* if the diagonal matrix is square it inherits some of the properties above */ 1496 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1497 break; 1498 default: 1499 break; 1500 } 1501 PetscFunctionReturn(PETSC_SUCCESS); 1502 } 1503 1504 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1505 { 1506 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1507 Mat_SeqBAIJ *Aloc; 1508 Mat B; 1509 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1510 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1511 MatScalar *a; 1512 1513 PetscFunctionBegin; 1514 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1515 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1516 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1517 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1518 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1519 /* Do not know preallocation information, but must set block size */ 1520 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1521 } else { 1522 B = *matout; 1523 } 1524 1525 /* copy over the A part */ 1526 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1527 ai = Aloc->i; 1528 aj = Aloc->j; 1529 a = Aloc->a; 1530 PetscCall(PetscMalloc1(bs, &rvals)); 1531 1532 for (i = 0; i < mbs; i++) { 1533 rvals[0] = bs * (baij->rstartbs + i); 1534 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1535 for (j = ai[i]; j < ai[i + 1]; j++) { 1536 col = (baij->cstartbs + aj[j]) * bs; 1537 for (k = 0; k < bs; k++) { 1538 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1539 1540 col++; 1541 a += bs; 1542 } 1543 } 1544 } 1545 /* copy over the B part */ 1546 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1547 ai = Aloc->i; 1548 aj = Aloc->j; 1549 a = Aloc->a; 1550 for (i = 0; i < mbs; i++) { 1551 rvals[0] = bs * (baij->rstartbs + i); 1552 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1553 for (j = ai[i]; j < ai[i + 1]; j++) { 1554 col = baij->garray[aj[j]] * bs; 1555 for (k = 0; k < bs; k++) { 1556 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1557 col++; 1558 a += bs; 1559 } 1560 } 1561 } 1562 PetscCall(PetscFree(rvals)); 1563 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1564 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1565 1566 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1567 else PetscCall(MatHeaderMerge(A, &B)); 1568 PetscFunctionReturn(PETSC_SUCCESS); 1569 } 1570 1571 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1572 { 1573 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1574 Mat a = baij->A, b = baij->B; 1575 PetscInt s1, s2, s3; 1576 1577 PetscFunctionBegin; 1578 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1579 if (rr) { 1580 PetscCall(VecGetLocalSize(rr, &s1)); 1581 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1582 /* Overlap communication with computation. */ 1583 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1584 } 1585 if (ll) { 1586 PetscCall(VecGetLocalSize(ll, &s1)); 1587 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1588 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1589 } 1590 /* scale the diagonal block */ 1591 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1592 1593 if (rr) { 1594 /* Do a scatter end and then right scale the off-diagonal block */ 1595 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1596 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1597 } 1598 PetscFunctionReturn(PETSC_SUCCESS); 1599 } 1600 1601 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1602 { 1603 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1604 PetscInt *lrows; 1605 PetscInt r, len; 1606 PetscBool cong; 1607 1608 PetscFunctionBegin; 1609 /* get locally owned rows */ 1610 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1611 /* fix right-hand side if needed */ 1612 if (x && b) { 1613 const PetscScalar *xx; 1614 PetscScalar *bb; 1615 1616 PetscCall(VecGetArrayRead(x, &xx)); 1617 PetscCall(VecGetArray(b, &bb)); 1618 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1619 PetscCall(VecRestoreArrayRead(x, &xx)); 1620 PetscCall(VecRestoreArray(b, &bb)); 1621 } 1622 1623 /* actually zap the local rows */ 1624 /* 1625 Zero the required rows. If the "diagonal block" of the matrix 1626 is square and the user wishes to set the diagonal we use separate 1627 code so that MatSetValues() is not called for each diagonal allocating 1628 new memory, thus calling lots of mallocs and slowing things down. 1629 1630 */ 1631 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1632 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1633 PetscCall(MatHasCongruentLayouts(A, &cong)); 1634 if ((diag != 0.0) && cong) { 1635 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1636 } else if (diag != 0.0) { 1637 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1638 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1639 for (r = 0; r < len; ++r) { 1640 const PetscInt row = lrows[r] + A->rmap->rstart; 1641 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1642 } 1643 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1644 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1645 } else { 1646 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1647 } 1648 PetscCall(PetscFree(lrows)); 1649 1650 /* only change matrix nonzero state if pattern was allowed to be changed */ 1651 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1652 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1653 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1654 } 1655 PetscFunctionReturn(PETSC_SUCCESS); 1656 } 1657 1658 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1659 { 1660 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1661 PetscMPIInt n, p = 0; 1662 PetscInt i, j, k, r, len = 0, row, col, count; 1663 PetscInt *lrows, *owners = A->rmap->range; 1664 PetscSFNode *rrows; 1665 PetscSF sf; 1666 const PetscScalar *xx; 1667 PetscScalar *bb, *mask; 1668 Vec xmask, lmask; 1669 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1670 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1671 PetscScalar *aa; 1672 1673 PetscFunctionBegin; 1674 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1675 /* Create SF where leaves are input rows and roots are owned rows */ 1676 PetscCall(PetscMalloc1(n, &lrows)); 1677 for (r = 0; r < n; ++r) lrows[r] = -1; 1678 PetscCall(PetscMalloc1(N, &rrows)); 1679 for (r = 0; r < N; ++r) { 1680 const PetscInt idx = rows[r]; 1681 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1682 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1683 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1684 } 1685 rrows[r].rank = p; 1686 rrows[r].index = rows[r] - owners[p]; 1687 } 1688 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1689 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1690 /* Collect flags for rows to be zeroed */ 1691 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1692 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1693 PetscCall(PetscSFDestroy(&sf)); 1694 /* Compress and put in row numbers */ 1695 for (r = 0; r < n; ++r) 1696 if (lrows[r] >= 0) lrows[len++] = r; 1697 /* zero diagonal part of matrix */ 1698 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1699 /* handle off-diagonal part of matrix */ 1700 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1701 PetscCall(VecDuplicate(l->lvec, &lmask)); 1702 PetscCall(VecGetArray(xmask, &bb)); 1703 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1704 PetscCall(VecRestoreArray(xmask, &bb)); 1705 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1706 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1707 PetscCall(VecDestroy(&xmask)); 1708 if (x) { 1709 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1712 PetscCall(VecGetArray(b, &bb)); 1713 } 1714 PetscCall(VecGetArray(lmask, &mask)); 1715 /* remove zeroed rows of off-diagonal matrix */ 1716 for (i = 0; i < len; ++i) { 1717 row = lrows[i]; 1718 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1719 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1720 for (k = 0; k < count; ++k) { 1721 aa[0] = 0.0; 1722 aa += bs; 1723 } 1724 } 1725 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1726 for (i = 0; i < l->B->rmap->N; ++i) { 1727 row = i / bs; 1728 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1729 for (k = 0; k < bs; ++k) { 1730 col = bs * baij->j[j] + k; 1731 if (PetscAbsScalar(mask[col])) { 1732 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1733 if (x) bb[i] -= aa[0] * xx[col]; 1734 aa[0] = 0.0; 1735 } 1736 } 1737 } 1738 } 1739 if (x) { 1740 PetscCall(VecRestoreArray(b, &bb)); 1741 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1742 } 1743 PetscCall(VecRestoreArray(lmask, &mask)); 1744 PetscCall(VecDestroy(&lmask)); 1745 PetscCall(PetscFree(lrows)); 1746 1747 /* only change matrix nonzero state if pattern was allowed to be changed */ 1748 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1749 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1750 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1751 } 1752 PetscFunctionReturn(PETSC_SUCCESS); 1753 } 1754 1755 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1756 { 1757 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1758 1759 PetscFunctionBegin; 1760 PetscCall(MatSetUnfactored(a->A)); 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1765 1766 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1767 { 1768 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1769 Mat a, b, c, d; 1770 PetscBool flg; 1771 1772 PetscFunctionBegin; 1773 a = matA->A; 1774 b = matA->B; 1775 c = matB->A; 1776 d = matB->B; 1777 1778 PetscCall(MatEqual(a, c, &flg)); 1779 if (flg) PetscCall(MatEqual(b, d, &flg)); 1780 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1781 PetscFunctionReturn(PETSC_SUCCESS); 1782 } 1783 1784 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1785 { 1786 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1787 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1788 1789 PetscFunctionBegin; 1790 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1791 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1792 PetscCall(MatCopy_Basic(A, B, str)); 1793 } else { 1794 PetscCall(MatCopy(a->A, b->A, str)); 1795 PetscCall(MatCopy(a->B, b->B, str)); 1796 } 1797 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1798 PetscFunctionReturn(PETSC_SUCCESS); 1799 } 1800 1801 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1802 { 1803 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1804 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1805 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1806 1807 PetscFunctionBegin; 1808 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1809 PetscFunctionReturn(PETSC_SUCCESS); 1810 } 1811 1812 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1813 { 1814 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1815 PetscBLASInt bnz, one = 1; 1816 Mat_SeqBAIJ *x, *y; 1817 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1818 1819 PetscFunctionBegin; 1820 if (str == SAME_NONZERO_PATTERN) { 1821 PetscScalar alpha = a; 1822 x = (Mat_SeqBAIJ *)xx->A->data; 1823 y = (Mat_SeqBAIJ *)yy->A->data; 1824 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1825 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1826 x = (Mat_SeqBAIJ *)xx->B->data; 1827 y = (Mat_SeqBAIJ *)yy->B->data; 1828 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1829 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1830 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1831 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1832 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1833 } else { 1834 Mat B; 1835 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1836 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1837 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1838 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1839 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1840 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1841 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1842 PetscCall(MatSetType(B, MATMPIBAIJ)); 1843 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1844 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1845 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1846 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1847 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1848 PetscCall(MatHeaderMerge(Y, &B)); 1849 PetscCall(PetscFree(nnz_d)); 1850 PetscCall(PetscFree(nnz_o)); 1851 } 1852 PetscFunctionReturn(PETSC_SUCCESS); 1853 } 1854 1855 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1856 { 1857 PetscFunctionBegin; 1858 if (PetscDefined(USE_COMPLEX)) { 1859 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1860 1861 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1862 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1863 } 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1868 { 1869 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1870 1871 PetscFunctionBegin; 1872 PetscCall(MatRealPart(a->A)); 1873 PetscCall(MatRealPart(a->B)); 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1878 { 1879 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1880 1881 PetscFunctionBegin; 1882 PetscCall(MatImaginaryPart(a->A)); 1883 PetscCall(MatImaginaryPart(a->B)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1888 { 1889 IS iscol_local; 1890 PetscInt csize; 1891 1892 PetscFunctionBegin; 1893 PetscCall(ISGetLocalSize(iscol, &csize)); 1894 if (call == MAT_REUSE_MATRIX) { 1895 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1896 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1897 } else { 1898 PetscCall(ISAllGather(iscol, &iscol_local)); 1899 } 1900 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1901 if (call == MAT_INITIAL_MATRIX) { 1902 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1903 PetscCall(ISDestroy(&iscol_local)); 1904 } 1905 PetscFunctionReturn(PETSC_SUCCESS); 1906 } 1907 1908 /* 1909 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1910 in local and then by concatenating the local matrices the end result. 1911 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1912 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1913 */ 1914 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1915 { 1916 PetscMPIInt rank, size; 1917 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1918 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1919 Mat M, Mreuse; 1920 MatScalar *vwork, *aa; 1921 MPI_Comm comm; 1922 IS isrow_new, iscol_new; 1923 Mat_SeqBAIJ *aij; 1924 1925 PetscFunctionBegin; 1926 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1927 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1928 PetscCallMPI(MPI_Comm_size(comm, &size)); 1929 /* The compression and expansion should be avoided. Doesn't point 1930 out errors, might change the indices, hence buggey */ 1931 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1932 if (isrow == iscol) { 1933 iscol_new = isrow_new; 1934 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1935 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1936 1937 if (call == MAT_REUSE_MATRIX) { 1938 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1939 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1940 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1941 } else { 1942 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1943 } 1944 PetscCall(ISDestroy(&isrow_new)); 1945 PetscCall(ISDestroy(&iscol_new)); 1946 /* 1947 m - number of local rows 1948 n - number of columns (same on all processors) 1949 rstart - first row in new global matrix generated 1950 */ 1951 PetscCall(MatGetBlockSize(mat, &bs)); 1952 PetscCall(MatGetSize(Mreuse, &m, &n)); 1953 m = m / bs; 1954 n = n / bs; 1955 1956 if (call == MAT_INITIAL_MATRIX) { 1957 aij = (Mat_SeqBAIJ *)Mreuse->data; 1958 ii = aij->i; 1959 jj = aij->j; 1960 1961 /* 1962 Determine the number of non-zeros in the diagonal and off-diagonal 1963 portions of the matrix in order to do correct preallocation 1964 */ 1965 1966 /* first get start and end of "diagonal" columns */ 1967 if (csize == PETSC_DECIDE) { 1968 PetscCall(ISGetSize(isrow, &mglobal)); 1969 if (mglobal == n * bs) { /* square matrix */ 1970 nlocal = m; 1971 } else { 1972 nlocal = n / size + ((n % size) > rank); 1973 } 1974 } else { 1975 nlocal = csize / bs; 1976 } 1977 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1978 rstart = rend - nlocal; 1979 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1980 1981 /* next, compute all the lengths */ 1982 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1983 for (i = 0; i < m; i++) { 1984 jend = ii[i + 1] - ii[i]; 1985 olen = 0; 1986 dlen = 0; 1987 for (j = 0; j < jend; j++) { 1988 if (*jj < rstart || *jj >= rend) olen++; 1989 else dlen++; 1990 jj++; 1991 } 1992 olens[i] = olen; 1993 dlens[i] = dlen; 1994 } 1995 PetscCall(MatCreate(comm, &M)); 1996 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1997 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1998 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1999 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2000 PetscCall(PetscFree2(dlens, olens)); 2001 } else { 2002 PetscInt ml, nl; 2003 2004 M = *newmat; 2005 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2006 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2007 PetscCall(MatZeroEntries(M)); 2008 /* 2009 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2010 rather than the slower MatSetValues(). 2011 */ 2012 M->was_assembled = PETSC_TRUE; 2013 M->assembled = PETSC_FALSE; 2014 } 2015 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2016 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2017 aij = (Mat_SeqBAIJ *)Mreuse->data; 2018 ii = aij->i; 2019 jj = aij->j; 2020 aa = aij->a; 2021 for (i = 0; i < m; i++) { 2022 row = rstart / bs + i; 2023 nz = ii[i + 1] - ii[i]; 2024 cwork = jj; 2025 jj = PetscSafePointerPlusOffset(jj, nz); 2026 vwork = aa; 2027 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2028 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2029 } 2030 2031 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2032 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2033 *newmat = M; 2034 2035 /* save submatrix used in processor for next request */ 2036 if (call == MAT_INITIAL_MATRIX) { 2037 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2038 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2039 } 2040 PetscFunctionReturn(PETSC_SUCCESS); 2041 } 2042 2043 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2044 { 2045 MPI_Comm comm, pcomm; 2046 PetscInt clocal_size, nrows; 2047 const PetscInt *rows; 2048 PetscMPIInt size; 2049 IS crowp, lcolp; 2050 2051 PetscFunctionBegin; 2052 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2053 /* make a collective version of 'rowp' */ 2054 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2055 if (pcomm == comm) { 2056 crowp = rowp; 2057 } else { 2058 PetscCall(ISGetSize(rowp, &nrows)); 2059 PetscCall(ISGetIndices(rowp, &rows)); 2060 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2061 PetscCall(ISRestoreIndices(rowp, &rows)); 2062 } 2063 PetscCall(ISSetPermutation(crowp)); 2064 /* make a local version of 'colp' */ 2065 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2066 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2067 if (size == 1) { 2068 lcolp = colp; 2069 } else { 2070 PetscCall(ISAllGather(colp, &lcolp)); 2071 } 2072 PetscCall(ISSetPermutation(lcolp)); 2073 /* now we just get the submatrix */ 2074 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2075 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2076 /* clean up */ 2077 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2078 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2079 PetscFunctionReturn(PETSC_SUCCESS); 2080 } 2081 2082 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2083 { 2084 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2085 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2086 2087 PetscFunctionBegin; 2088 if (nghosts) *nghosts = B->nbs; 2089 if (ghosts) *ghosts = baij->garray; 2090 PetscFunctionReturn(PETSC_SUCCESS); 2091 } 2092 2093 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2094 { 2095 Mat B; 2096 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2097 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2098 Mat_SeqAIJ *b; 2099 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2100 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2101 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2102 2103 PetscFunctionBegin; 2104 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2105 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2106 2107 /* Tell every processor the number of nonzeros per row */ 2108 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2109 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2110 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2111 displs = recvcounts + size; 2112 for (i = 0; i < size; i++) { 2113 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2114 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2115 } 2116 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2117 /* Create the sequential matrix of the same type as the local block diagonal */ 2118 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2119 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2120 PetscCall(MatSetType(B, MATSEQAIJ)); 2121 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2122 b = (Mat_SeqAIJ *)B->data; 2123 2124 /* Copy my part of matrix column indices over */ 2125 sendcount = ad->nz + bd->nz; 2126 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2127 a_jsendbuf = ad->j; 2128 b_jsendbuf = bd->j; 2129 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2130 cnt = 0; 2131 for (i = 0; i < n; i++) { 2132 /* put in lower diagonal portion */ 2133 m = bd->i[i + 1] - bd->i[i]; 2134 while (m > 0) { 2135 /* is it above diagonal (in bd (compressed) numbering) */ 2136 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2137 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2138 m--; 2139 } 2140 2141 /* put in diagonal portion */ 2142 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2143 2144 /* put in upper diagonal portion */ 2145 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2146 } 2147 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2148 2149 /* Gather all column indices to all processors */ 2150 for (i = 0; i < size; i++) { 2151 recvcounts[i] = 0; 2152 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2153 } 2154 displs[0] = 0; 2155 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2156 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2157 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2158 /* set the b->ilen (length of each row) values */ 2159 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2160 /* set the b->i indices */ 2161 b->i[0] = 0; 2162 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2163 PetscCall(PetscFree(lens)); 2164 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2165 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2166 PetscCall(PetscFree(recvcounts)); 2167 2168 PetscCall(MatPropagateSymmetryOptions(A, B)); 2169 *newmat = B; 2170 PetscFunctionReturn(PETSC_SUCCESS); 2171 } 2172 2173 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2174 { 2175 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2176 Vec bb1 = NULL; 2177 2178 PetscFunctionBegin; 2179 if (flag == SOR_APPLY_UPPER) { 2180 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2181 PetscFunctionReturn(PETSC_SUCCESS); 2182 } 2183 2184 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2185 2186 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2187 if (flag & SOR_ZERO_INITIAL_GUESS) { 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2189 its--; 2190 } 2191 2192 while (its--) { 2193 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2194 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2195 2196 /* update rhs: bb1 = bb - B*x */ 2197 PetscCall(VecScale(mat->lvec, -1.0)); 2198 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2199 2200 /* local sweep */ 2201 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2202 } 2203 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2204 if (flag & SOR_ZERO_INITIAL_GUESS) { 2205 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2206 its--; 2207 } 2208 while (its--) { 2209 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2210 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2211 2212 /* update rhs: bb1 = bb - B*x */ 2213 PetscCall(VecScale(mat->lvec, -1.0)); 2214 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2215 2216 /* local sweep */ 2217 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2218 } 2219 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2220 if (flag & SOR_ZERO_INITIAL_GUESS) { 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2222 its--; 2223 } 2224 while (its--) { 2225 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2226 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2227 2228 /* update rhs: bb1 = bb - B*x */ 2229 PetscCall(VecScale(mat->lvec, -1.0)); 2230 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2231 2232 /* local sweep */ 2233 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2234 } 2235 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2236 2237 PetscCall(VecDestroy(&bb1)); 2238 PetscFunctionReturn(PETSC_SUCCESS); 2239 } 2240 2241 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2242 { 2243 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2244 PetscInt m, N, i, *garray = aij->garray; 2245 PetscInt ib, jb, bs = A->rmap->bs; 2246 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2247 MatScalar *a_val = a_aij->a; 2248 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2249 MatScalar *b_val = b_aij->a; 2250 PetscReal *work; 2251 2252 PetscFunctionBegin; 2253 PetscCall(MatGetSize(A, &m, &N)); 2254 PetscCall(PetscCalloc1(N, &work)); 2255 if (type == NORM_2) { 2256 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2257 for (jb = 0; jb < bs; jb++) { 2258 for (ib = 0; ib < bs; ib++) { 2259 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2260 a_val++; 2261 } 2262 } 2263 } 2264 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2265 for (jb = 0; jb < bs; jb++) { 2266 for (ib = 0; ib < bs; ib++) { 2267 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2268 b_val++; 2269 } 2270 } 2271 } 2272 } else if (type == NORM_1) { 2273 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2277 a_val++; 2278 } 2279 } 2280 } 2281 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2282 for (jb = 0; jb < bs; jb++) { 2283 for (ib = 0; ib < bs; ib++) { 2284 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2285 b_val++; 2286 } 2287 } 2288 } 2289 } else if (type == NORM_INFINITY) { 2290 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2294 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2295 a_val++; 2296 } 2297 } 2298 } 2299 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2304 b_val++; 2305 } 2306 } 2307 } 2308 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2309 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2310 for (jb = 0; jb < bs; jb++) { 2311 for (ib = 0; ib < bs; ib++) { 2312 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2313 a_val++; 2314 } 2315 } 2316 } 2317 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2318 for (jb = 0; jb < bs; jb++) { 2319 for (ib = 0; ib < bs; ib++) { 2320 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2321 b_val++; 2322 } 2323 } 2324 } 2325 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2326 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2330 a_val++; 2331 } 2332 } 2333 } 2334 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2335 for (jb = 0; jb < bs; jb++) { 2336 for (ib = 0; ib < bs; ib++) { 2337 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2338 b_val++; 2339 } 2340 } 2341 } 2342 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2343 if (type == NORM_INFINITY) { 2344 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2345 } else { 2346 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2347 } 2348 PetscCall(PetscFree(work)); 2349 if (type == NORM_2) { 2350 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2351 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2352 for (i = 0; i < N; i++) reductions[i] /= m; 2353 } 2354 PetscFunctionReturn(PETSC_SUCCESS); 2355 } 2356 2357 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2358 { 2359 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2360 2361 PetscFunctionBegin; 2362 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2363 A->factorerrortype = a->A->factorerrortype; 2364 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2365 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2370 { 2371 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2372 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2373 2374 PetscFunctionBegin; 2375 if (!Y->preallocated) { 2376 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2377 } else if (!aij->nz) { 2378 PetscInt nonew = aij->nonew; 2379 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2380 aij->nonew = nonew; 2381 } 2382 PetscCall(MatShift_Basic(Y, a)); 2383 PetscFunctionReturn(PETSC_SUCCESS); 2384 } 2385 2386 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2387 { 2388 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2389 2390 PetscFunctionBegin; 2391 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2392 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2393 if (d) { 2394 PetscInt rstart; 2395 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2396 *d += rstart / A->rmap->bs; 2397 } 2398 PetscFunctionReturn(PETSC_SUCCESS); 2399 } 2400 2401 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2402 { 2403 PetscFunctionBegin; 2404 *a = ((Mat_MPIBAIJ *)A->data)->A; 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2409 { 2410 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2411 2412 PetscFunctionBegin; 2413 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2414 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2419 MatGetRow_MPIBAIJ, 2420 MatRestoreRow_MPIBAIJ, 2421 MatMult_MPIBAIJ, 2422 /* 4*/ MatMultAdd_MPIBAIJ, 2423 MatMultTranspose_MPIBAIJ, 2424 MatMultTransposeAdd_MPIBAIJ, 2425 NULL, 2426 NULL, 2427 NULL, 2428 /*10*/ NULL, 2429 NULL, 2430 NULL, 2431 MatSOR_MPIBAIJ, 2432 MatTranspose_MPIBAIJ, 2433 /*15*/ MatGetInfo_MPIBAIJ, 2434 MatEqual_MPIBAIJ, 2435 MatGetDiagonal_MPIBAIJ, 2436 MatDiagonalScale_MPIBAIJ, 2437 MatNorm_MPIBAIJ, 2438 /*20*/ MatAssemblyBegin_MPIBAIJ, 2439 MatAssemblyEnd_MPIBAIJ, 2440 MatSetOption_MPIBAIJ, 2441 MatZeroEntries_MPIBAIJ, 2442 /*24*/ MatZeroRows_MPIBAIJ, 2443 NULL, 2444 NULL, 2445 NULL, 2446 NULL, 2447 /*29*/ MatSetUp_MPI_Hash, 2448 NULL, 2449 NULL, 2450 MatGetDiagonalBlock_MPIBAIJ, 2451 NULL, 2452 /*34*/ MatDuplicate_MPIBAIJ, 2453 NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 /*39*/ MatAXPY_MPIBAIJ, 2458 MatCreateSubMatrices_MPIBAIJ, 2459 MatIncreaseOverlap_MPIBAIJ, 2460 MatGetValues_MPIBAIJ, 2461 MatCopy_MPIBAIJ, 2462 /*44*/ NULL, 2463 MatScale_MPIBAIJ, 2464 MatShift_MPIBAIJ, 2465 NULL, 2466 MatZeroRowsColumns_MPIBAIJ, 2467 /*49*/ NULL, 2468 NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 /*54*/ MatFDColoringCreate_MPIXAIJ, 2473 NULL, 2474 MatSetUnfactored_MPIBAIJ, 2475 MatPermute_MPIBAIJ, 2476 MatSetValuesBlocked_MPIBAIJ, 2477 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2478 MatDestroy_MPIBAIJ, 2479 MatView_MPIBAIJ, 2480 NULL, 2481 NULL, 2482 /*64*/ NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2488 NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 /*74*/ NULL, 2493 MatFDColoringApply_BAIJ, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*79*/ NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 MatLoad_MPIBAIJ, 2502 /*84*/ NULL, 2503 NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*89*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*94*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*99*/ NULL, 2518 NULL, 2519 NULL, 2520 MatConjugate_MPIBAIJ, 2521 NULL, 2522 /*104*/ NULL, 2523 MatRealPart_MPIBAIJ, 2524 MatImaginaryPart_MPIBAIJ, 2525 NULL, 2526 NULL, 2527 /*109*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 MatMissingDiagonal_MPIBAIJ, 2532 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2533 NULL, 2534 MatGetGhosts_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 /*119*/ NULL, 2538 NULL, 2539 NULL, 2540 NULL, 2541 MatGetMultiProcBlock_MPIBAIJ, 2542 /*124*/ NULL, 2543 MatGetColumnReductions_MPIBAIJ, 2544 MatInvertBlockDiagonal_MPIBAIJ, 2545 NULL, 2546 NULL, 2547 /*129*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 NULL, 2552 /*134*/ NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 NULL, 2557 /*139*/ MatSetBlockSizes_Default, 2558 NULL, 2559 NULL, 2560 MatFDColoringSetUp_MPIXAIJ, 2561 NULL, 2562 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 /*150*/ NULL, 2569 MatEliminateZeros_MPIBAIJ, 2570 MatGetRowSumAbs_MPIBAIJ, 2571 NULL, 2572 NULL, 2573 /*155*/ NULL, 2574 MatCopyHashToXAIJ_MPI_Hash}; 2575 2576 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2577 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2578 2579 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2580 { 2581 PetscInt m, rstart, cstart, cend; 2582 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2583 const PetscInt *JJ = NULL; 2584 PetscScalar *values = NULL; 2585 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2586 PetscBool nooffprocentries; 2587 2588 PetscFunctionBegin; 2589 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2590 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2591 PetscCall(PetscLayoutSetUp(B->rmap)); 2592 PetscCall(PetscLayoutSetUp(B->cmap)); 2593 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2594 m = B->rmap->n / bs; 2595 rstart = B->rmap->rstart / bs; 2596 cstart = B->cmap->rstart / bs; 2597 cend = B->cmap->rend / bs; 2598 2599 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2600 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2601 for (i = 0; i < m; i++) { 2602 nz = ii[i + 1] - ii[i]; 2603 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2604 nz_max = PetscMax(nz_max, nz); 2605 dlen = 0; 2606 olen = 0; 2607 JJ = jj + ii[i]; 2608 for (j = 0; j < nz; j++) { 2609 if (*JJ < cstart || *JJ >= cend) olen++; 2610 else dlen++; 2611 JJ++; 2612 } 2613 d_nnz[i] = dlen; 2614 o_nnz[i] = olen; 2615 } 2616 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2617 PetscCall(PetscFree2(d_nnz, o_nnz)); 2618 2619 values = (PetscScalar *)V; 2620 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2621 for (i = 0; i < m; i++) { 2622 PetscInt row = i + rstart; 2623 PetscInt ncols = ii[i + 1] - ii[i]; 2624 const PetscInt *icols = jj + ii[i]; 2625 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2626 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2627 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2628 } else { /* block ordering does not match so we can only insert one block at a time. */ 2629 PetscInt j; 2630 for (j = 0; j < ncols; j++) { 2631 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2633 } 2634 } 2635 } 2636 2637 if (!V) PetscCall(PetscFree(values)); 2638 nooffprocentries = B->nooffprocentries; 2639 B->nooffprocentries = PETSC_TRUE; 2640 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2641 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2642 B->nooffprocentries = nooffprocentries; 2643 2644 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2645 PetscFunctionReturn(PETSC_SUCCESS); 2646 } 2647 2648 /*@C 2649 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2650 2651 Collective 2652 2653 Input Parameters: 2654 + B - the matrix 2655 . bs - the block size 2656 . i - the indices into `j` for the start of each local row (starts with zero) 2657 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2658 - v - optional values in the matrix, use `NULL` if not provided 2659 2660 Level: advanced 2661 2662 Notes: 2663 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2664 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2665 called this routine. 2666 2667 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2668 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2669 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2670 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2671 block column and the second index is over columns within a block. 2672 2673 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2674 2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2676 @*/ 2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2681 PetscValidType(B, 1); 2682 PetscValidLogicalCollectiveInt(B, bs, 2); 2683 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2688 { 2689 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2690 PetscInt i; 2691 PetscMPIInt size; 2692 2693 PetscFunctionBegin; 2694 if (B->hash_active) { 2695 B->ops[0] = b->cops; 2696 B->hash_active = PETSC_FALSE; 2697 } 2698 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2699 PetscCall(MatSetBlockSize(B, bs)); 2700 PetscCall(PetscLayoutSetUp(B->rmap)); 2701 PetscCall(PetscLayoutSetUp(B->cmap)); 2702 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2703 2704 if (d_nnz) { 2705 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2706 } 2707 if (o_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2709 } 2710 2711 b->bs2 = bs * bs; 2712 b->mbs = B->rmap->n / bs; 2713 b->nbs = B->cmap->n / bs; 2714 b->Mbs = B->rmap->N / bs; 2715 b->Nbs = B->cmap->N / bs; 2716 2717 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2718 b->rstartbs = B->rmap->rstart / bs; 2719 b->rendbs = B->rmap->rend / bs; 2720 b->cstartbs = B->cmap->rstart / bs; 2721 b->cendbs = B->cmap->rend / bs; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 PetscCall(PetscHMapIDestroy(&b->colmap)); 2725 #else 2726 PetscCall(PetscFree(b->colmap)); 2727 #endif 2728 PetscCall(PetscFree(b->garray)); 2729 PetscCall(VecDestroy(&b->lvec)); 2730 PetscCall(VecScatterDestroy(&b->Mvctx)); 2731 2732 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2733 2734 MatSeqXAIJGetOptions_Private(b->B); 2735 PetscCall(MatDestroy(&b->B)); 2736 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2737 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2738 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2739 MatSeqXAIJRestoreOptions_Private(b->B); 2740 2741 MatSeqXAIJGetOptions_Private(b->A); 2742 PetscCall(MatDestroy(&b->A)); 2743 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2744 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2745 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2746 MatSeqXAIJRestoreOptions_Private(b->A); 2747 2748 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2749 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2750 B->preallocated = PETSC_TRUE; 2751 B->was_assembled = PETSC_FALSE; 2752 B->assembled = PETSC_FALSE; 2753 PetscFunctionReturn(PETSC_SUCCESS); 2754 } 2755 2756 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2757 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2758 2759 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2760 { 2761 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2762 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2763 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2764 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2765 2766 PetscFunctionBegin; 2767 PetscCall(PetscMalloc1(M + 1, &ii)); 2768 ii[0] = 0; 2769 for (i = 0; i < M; i++) { 2770 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2771 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2772 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2773 /* remove one from count of matrix has diagonal */ 2774 for (j = id[i]; j < id[i + 1]; j++) { 2775 if (jd[j] == i) { 2776 ii[i + 1]--; 2777 break; 2778 } 2779 } 2780 } 2781 PetscCall(PetscMalloc1(ii[M], &jj)); 2782 cnt = 0; 2783 for (i = 0; i < M; i++) { 2784 for (j = io[i]; j < io[i + 1]; j++) { 2785 if (garray[jo[j]] > rstart) break; 2786 jj[cnt++] = garray[jo[j]]; 2787 } 2788 for (k = id[i]; k < id[i + 1]; k++) { 2789 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2790 } 2791 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2792 } 2793 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2794 PetscFunctionReturn(PETSC_SUCCESS); 2795 } 2796 2797 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2798 2799 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2800 2801 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2802 { 2803 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2804 Mat_MPIAIJ *b; 2805 Mat B; 2806 2807 PetscFunctionBegin; 2808 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2809 2810 if (reuse == MAT_REUSE_MATRIX) { 2811 B = *newmat; 2812 } else { 2813 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2814 PetscCall(MatSetType(B, MATMPIAIJ)); 2815 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2816 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2817 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2818 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2819 } 2820 b = (Mat_MPIAIJ *)B->data; 2821 2822 if (reuse == MAT_REUSE_MATRIX) { 2823 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2825 } else { 2826 PetscInt *garray = a->garray; 2827 Mat_SeqAIJ *bB; 2828 PetscInt bs, nnz; 2829 PetscCall(MatDestroy(&b->A)); 2830 PetscCall(MatDestroy(&b->B)); 2831 /* just clear out the data structure */ 2832 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2835 2836 /* Global numbering for b->B columns */ 2837 bB = (Mat_SeqAIJ *)b->B->data; 2838 bs = A->rmap->bs; 2839 nnz = bB->i[A->rmap->n]; 2840 for (PetscInt k = 0; k < nnz; k++) { 2841 PetscInt bj = bB->j[k] / bs; 2842 PetscInt br = bB->j[k] % bs; 2843 bB->j[k] = garray[bj] * bs + br; 2844 } 2845 } 2846 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2847 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2848 2849 if (reuse == MAT_INPLACE_MATRIX) { 2850 PetscCall(MatHeaderReplace(A, &B)); 2851 } else { 2852 *newmat = B; 2853 } 2854 PetscFunctionReturn(PETSC_SUCCESS); 2855 } 2856 2857 /*MC 2858 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2859 2860 Options Database Keys: 2861 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2862 . -mat_block_size <bs> - set the blocksize used to store the matrix 2863 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2864 - -mat_use_hash_table <fact> - set hash table factor 2865 2866 Level: beginner 2867 2868 Note: 2869 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2870 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2871 2872 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2873 M*/ 2874 2875 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2876 2877 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2878 { 2879 Mat_MPIBAIJ *b; 2880 PetscBool flg = PETSC_FALSE; 2881 2882 PetscFunctionBegin; 2883 PetscCall(PetscNew(&b)); 2884 B->data = (void *)b; 2885 B->ops[0] = MatOps_Values; 2886 B->assembled = PETSC_FALSE; 2887 2888 B->insertmode = NOT_SET_VALUES; 2889 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2890 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2891 2892 /* build local table of row and column ownerships */ 2893 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2894 2895 /* build cache for off array entries formed */ 2896 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2897 2898 b->donotstash = PETSC_FALSE; 2899 b->colmap = NULL; 2900 b->garray = NULL; 2901 b->roworiented = PETSC_TRUE; 2902 2903 /* stuff used in block assembly */ 2904 b->barray = NULL; 2905 2906 /* stuff used for matrix vector multiply */ 2907 b->lvec = NULL; 2908 b->Mvctx = NULL; 2909 2910 /* stuff for MatGetRow() */ 2911 b->rowindices = NULL; 2912 b->rowvalues = NULL; 2913 b->getrowactive = PETSC_FALSE; 2914 2915 /* hash table stuff */ 2916 b->ht = NULL; 2917 b->hd = NULL; 2918 b->ht_size = 0; 2919 b->ht_flag = PETSC_FALSE; 2920 b->ht_fact = 0; 2921 b->ht_total_ct = 0; 2922 b->ht_insert_ct = 0; 2923 2924 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2925 b->ijonly = PETSC_FALSE; 2926 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2930 #if defined(PETSC_HAVE_HYPRE) 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2932 #endif 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2940 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2941 2942 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2943 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2944 if (flg) { 2945 PetscReal fact = 1.39; 2946 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2947 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2948 if (fact <= 1.0) fact = 1.39; 2949 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2950 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2951 } 2952 PetscOptionsEnd(); 2953 PetscFunctionReturn(PETSC_SUCCESS); 2954 } 2955 2956 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2957 /*MC 2958 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2959 2960 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2961 and `MATMPIBAIJ` otherwise. 2962 2963 Options Database Keys: 2964 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2965 2966 Level: beginner 2967 2968 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2969 M*/ 2970 2971 /*@ 2972 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2973 (block compressed row). 2974 2975 Collective 2976 2977 Input Parameters: 2978 + B - the matrix 2979 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2980 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2981 . d_nz - number of block nonzeros per block row in diagonal portion of local 2982 submatrix (same for all local rows) 2983 . d_nnz - array containing the number of block nonzeros in the various block rows 2984 of the in diagonal portion of the local (possibly different for each block 2985 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2986 set it even if it is zero. 2987 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2988 submatrix (same for all local rows). 2989 - o_nnz - array containing the number of nonzeros in the various block rows of the 2990 off-diagonal portion of the local submatrix (possibly different for 2991 each block row) or `NULL`. 2992 2993 If the *_nnz parameter is given then the *_nz parameter is ignored 2994 2995 Options Database Keys: 2996 + -mat_block_size - size of the blocks to use 2997 - -mat_use_hash_table <fact> - set hash table factor 2998 2999 Level: intermediate 3000 3001 Notes: 3002 For good matrix assembly performance 3003 the user should preallocate the matrix storage by setting the parameters 3004 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3005 performance can be increased by more than a factor of 50. 3006 3007 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3008 than it must be used on all processors that share the object for that argument. 3009 3010 Storage Information: 3011 For a square global matrix we define each processor's diagonal portion 3012 to be its local rows and the corresponding columns (a square submatrix); 3013 each processor's off-diagonal portion encompasses the remainder of the 3014 local matrix (a rectangular submatrix). 3015 3016 The user can specify preallocated storage for the diagonal part of 3017 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3018 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3019 memory allocation. Likewise, specify preallocated storage for the 3020 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3021 3022 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3023 the figure below we depict these three local rows and all columns (0-11). 3024 3025 .vb 3026 0 1 2 3 4 5 6 7 8 9 10 11 3027 -------------------------- 3028 row 3 |o o o d d d o o o o o o 3029 row 4 |o o o d d d o o o o o o 3030 row 5 |o o o d d d o o o o o o 3031 -------------------------- 3032 .ve 3033 3034 Thus, any entries in the d locations are stored in the d (diagonal) 3035 submatrix, and any entries in the o locations are stored in the 3036 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3037 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3038 3039 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3040 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3041 In general, for PDE problems in which most nonzeros are near the diagonal, 3042 one expects `d_nz` >> `o_nz`. 3043 3044 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3045 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3046 You can also run with the option `-info` and look for messages with the string 3047 malloc in them to see if additional memory allocation was needed. 3048 3049 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3050 @*/ 3051 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3052 { 3053 PetscFunctionBegin; 3054 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3055 PetscValidType(B, 1); 3056 PetscValidLogicalCollectiveInt(B, bs, 2); 3057 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3058 PetscFunctionReturn(PETSC_SUCCESS); 3059 } 3060 3061 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3062 /*@ 3063 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3064 (block compressed row). 3065 3066 Collective 3067 3068 Input Parameters: 3069 + comm - MPI communicator 3070 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3071 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3072 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3073 This value should be the same as the local size used in creating the 3074 y vector for the matrix-vector product y = Ax. 3075 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3076 This value should be the same as the local size used in creating the 3077 x vector for the matrix-vector product y = Ax. 3078 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3079 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3080 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3081 submatrix (same for all local rows) 3082 . d_nnz - array containing the number of nonzero blocks in the various block rows 3083 of the in diagonal portion of the local (possibly different for each block 3084 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3085 and set it even if it is zero. 3086 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3087 submatrix (same for all local rows). 3088 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3089 off-diagonal portion of the local submatrix (possibly different for 3090 each block row) or NULL. 3091 3092 Output Parameter: 3093 . A - the matrix 3094 3095 Options Database Keys: 3096 + -mat_block_size - size of the blocks to use 3097 - -mat_use_hash_table <fact> - set hash table factor 3098 3099 Level: intermediate 3100 3101 Notes: 3102 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3103 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3104 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3105 3106 For good matrix assembly performance 3107 the user should preallocate the matrix storage by setting the parameters 3108 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3109 performance can be increased by more than a factor of 50. 3110 3111 If the *_nnz parameter is given then the *_nz parameter is ignored 3112 3113 A nonzero block is any block that as 1 or more nonzeros in it 3114 3115 The user MUST specify either the local or global matrix dimensions 3116 (possibly both). 3117 3118 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3119 than it must be used on all processors that share the object for that argument. 3120 3121 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3122 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3123 3124 Storage Information: 3125 For a square global matrix we define each processor's diagonal portion 3126 to be its local rows and the corresponding columns (a square submatrix); 3127 each processor's off-diagonal portion encompasses the remainder of the 3128 local matrix (a rectangular submatrix). 3129 3130 The user can specify preallocated storage for the diagonal part of 3131 the local submatrix with either d_nz or d_nnz (not both). Set 3132 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3133 memory allocation. Likewise, specify preallocated storage for the 3134 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3135 3136 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3137 the figure below we depict these three local rows and all columns (0-11). 3138 3139 .vb 3140 0 1 2 3 4 5 6 7 8 9 10 11 3141 -------------------------- 3142 row 3 |o o o d d d o o o o o o 3143 row 4 |o o o d d d o o o o o o 3144 row 5 |o o o d d d o o o o o o 3145 -------------------------- 3146 .ve 3147 3148 Thus, any entries in the d locations are stored in the d (diagonal) 3149 submatrix, and any entries in the o locations are stored in the 3150 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3151 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3152 3153 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3154 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3155 In general, for PDE problems in which most nonzeros are near the diagonal, 3156 one expects `d_nz` >> `o_nz`. 3157 3158 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3159 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3160 @*/ 3161 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3162 { 3163 PetscMPIInt size; 3164 3165 PetscFunctionBegin; 3166 PetscCall(MatCreate(comm, A)); 3167 PetscCall(MatSetSizes(*A, m, n, M, N)); 3168 PetscCallMPI(MPI_Comm_size(comm, &size)); 3169 if (size > 1) { 3170 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3171 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3172 } else { 3173 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3174 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3175 } 3176 PetscFunctionReturn(PETSC_SUCCESS); 3177 } 3178 3179 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3180 { 3181 Mat mat; 3182 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3183 PetscInt len = 0; 3184 3185 PetscFunctionBegin; 3186 *newmat = NULL; 3187 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3188 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3189 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3190 3191 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3192 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3193 if (matin->hash_active) { 3194 PetscCall(MatSetUp(mat)); 3195 } else { 3196 mat->factortype = matin->factortype; 3197 mat->preallocated = PETSC_TRUE; 3198 mat->assembled = PETSC_TRUE; 3199 mat->insertmode = NOT_SET_VALUES; 3200 3201 a = (Mat_MPIBAIJ *)mat->data; 3202 mat->rmap->bs = matin->rmap->bs; 3203 a->bs2 = oldmat->bs2; 3204 a->mbs = oldmat->mbs; 3205 a->nbs = oldmat->nbs; 3206 a->Mbs = oldmat->Mbs; 3207 a->Nbs = oldmat->Nbs; 3208 3209 a->size = oldmat->size; 3210 a->rank = oldmat->rank; 3211 a->donotstash = oldmat->donotstash; 3212 a->roworiented = oldmat->roworiented; 3213 a->rowindices = NULL; 3214 a->rowvalues = NULL; 3215 a->getrowactive = PETSC_FALSE; 3216 a->barray = NULL; 3217 a->rstartbs = oldmat->rstartbs; 3218 a->rendbs = oldmat->rendbs; 3219 a->cstartbs = oldmat->cstartbs; 3220 a->cendbs = oldmat->cendbs; 3221 3222 /* hash table stuff */ 3223 a->ht = NULL; 3224 a->hd = NULL; 3225 a->ht_size = 0; 3226 a->ht_flag = oldmat->ht_flag; 3227 a->ht_fact = oldmat->ht_fact; 3228 a->ht_total_ct = 0; 3229 a->ht_insert_ct = 0; 3230 3231 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3232 if (oldmat->colmap) { 3233 #if defined(PETSC_USE_CTABLE) 3234 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3235 #else 3236 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3237 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3238 #endif 3239 } else a->colmap = NULL; 3240 3241 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3242 PetscCall(PetscMalloc1(len, &a->garray)); 3243 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3244 } else a->garray = NULL; 3245 3246 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3247 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3248 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3249 3250 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3251 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3252 } 3253 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3254 *newmat = mat; 3255 PetscFunctionReturn(PETSC_SUCCESS); 3256 } 3257 3258 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3259 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3260 { 3261 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3262 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3263 PetscScalar *matvals; 3264 3265 PetscFunctionBegin; 3266 PetscCall(PetscViewerSetUp(viewer)); 3267 3268 /* read in matrix header */ 3269 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3270 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3271 M = header[1]; 3272 N = header[2]; 3273 nz = header[3]; 3274 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3275 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3276 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3277 3278 /* set block sizes from the viewer's .info file */ 3279 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3280 /* set local sizes if not set already */ 3281 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3282 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3283 /* set global sizes if not set already */ 3284 if (mat->rmap->N < 0) mat->rmap->N = M; 3285 if (mat->cmap->N < 0) mat->cmap->N = N; 3286 PetscCall(PetscLayoutSetUp(mat->rmap)); 3287 PetscCall(PetscLayoutSetUp(mat->cmap)); 3288 3289 /* check if the matrix sizes are correct */ 3290 PetscCall(MatGetSize(mat, &rows, &cols)); 3291 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3292 PetscCall(MatGetBlockSize(mat, &bs)); 3293 PetscCall(MatGetLocalSize(mat, &m, &n)); 3294 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3295 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3296 mbs = m / bs; 3297 nbs = n / bs; 3298 3299 /* read in row lengths and build row indices */ 3300 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3302 rowidxs[0] = 0; 3303 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3304 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3305 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3306 3307 /* read in column indices and matrix values */ 3308 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3309 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3310 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3311 3312 { /* preallocate matrix storage */ 3313 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3314 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3315 PetscBool sbaij, done; 3316 PetscInt *d_nnz, *o_nnz; 3317 3318 PetscCall(PetscBTCreate(nbs, &bt)); 3319 PetscCall(PetscHSetICreate(&ht)); 3320 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3321 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3322 for (i = 0; i < mbs; i++) { 3323 PetscCall(PetscBTMemzero(nbs, bt)); 3324 PetscCall(PetscHSetIClear(ht)); 3325 for (k = 0; k < bs; k++) { 3326 PetscInt row = bs * i + k; 3327 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3328 PetscInt col = colidxs[j]; 3329 if (!sbaij || col >= row) { 3330 if (col >= cs && col < ce) { 3331 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3332 } else { 3333 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3334 if (done) o_nnz[i]++; 3335 } 3336 } 3337 } 3338 } 3339 } 3340 PetscCall(PetscBTDestroy(&bt)); 3341 PetscCall(PetscHSetIDestroy(&ht)); 3342 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3343 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3344 PetscCall(PetscFree2(d_nnz, o_nnz)); 3345 } 3346 3347 /* store matrix values */ 3348 for (i = 0; i < m; i++) { 3349 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3350 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3351 } 3352 3353 PetscCall(PetscFree(rowidxs)); 3354 PetscCall(PetscFree2(colidxs, matvals)); 3355 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3356 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3357 PetscFunctionReturn(PETSC_SUCCESS); 3358 } 3359 3360 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3361 { 3362 PetscBool isbinary; 3363 3364 PetscFunctionBegin; 3365 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3366 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3367 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 /*@ 3372 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3373 3374 Input Parameters: 3375 + mat - the matrix 3376 - fact - factor 3377 3378 Options Database Key: 3379 . -mat_use_hash_table <fact> - provide the factor 3380 3381 Level: advanced 3382 3383 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3384 @*/ 3385 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3386 { 3387 PetscFunctionBegin; 3388 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3389 PetscFunctionReturn(PETSC_SUCCESS); 3390 } 3391 3392 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3393 { 3394 Mat_MPIBAIJ *baij; 3395 3396 PetscFunctionBegin; 3397 baij = (Mat_MPIBAIJ *)mat->data; 3398 baij->ht_fact = fact; 3399 PetscFunctionReturn(PETSC_SUCCESS); 3400 } 3401 3402 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3403 { 3404 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3405 PetscBool flg; 3406 3407 PetscFunctionBegin; 3408 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3409 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3410 if (Ad) *Ad = a->A; 3411 if (Ao) *Ao = a->B; 3412 if (colmap) *colmap = a->garray; 3413 PetscFunctionReturn(PETSC_SUCCESS); 3414 } 3415 3416 /* 3417 Special version for direct calls from Fortran (to eliminate two function call overheads 3418 */ 3419 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3420 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3421 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3422 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3423 #endif 3424 3425 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3426 /*@C 3427 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3428 3429 Collective 3430 3431 Input Parameters: 3432 + matin - the matrix 3433 . min - number of input rows 3434 . im - input rows 3435 . nin - number of input columns 3436 . in - input columns 3437 . v - numerical values input 3438 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3439 3440 Level: advanced 3441 3442 Developer Notes: 3443 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3444 3445 .seealso: `Mat`, `MatSetValuesBlocked()` 3446 @*/ 3447 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3448 { 3449 /* convert input arguments to C version */ 3450 Mat mat = *matin; 3451 PetscInt m = *min, n = *nin; 3452 InsertMode addv = *addvin; 3453 3454 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3455 const MatScalar *value; 3456 MatScalar *barray = baij->barray; 3457 PetscBool roworiented = baij->roworiented; 3458 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3459 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3460 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3461 3462 PetscFunctionBegin; 3463 /* tasks normally handled by MatSetValuesBlocked() */ 3464 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3465 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3466 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3467 if (mat->assembled) { 3468 mat->was_assembled = PETSC_TRUE; 3469 mat->assembled = PETSC_FALSE; 3470 } 3471 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3472 3473 if (!barray) { 3474 PetscCall(PetscMalloc1(bs2, &barray)); 3475 baij->barray = barray; 3476 } 3477 3478 if (roworiented) stepval = (n - 1) * bs; 3479 else stepval = (m - 1) * bs; 3480 3481 for (i = 0; i < m; i++) { 3482 if (im[i] < 0) continue; 3483 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3484 if (im[i] >= rstart && im[i] < rend) { 3485 row = im[i] - rstart; 3486 for (j = 0; j < n; j++) { 3487 /* If NumCol = 1 then a copy is not required */ 3488 if ((roworiented) && (n == 1)) { 3489 barray = (MatScalar *)v + i * bs2; 3490 } else if ((!roworiented) && (m == 1)) { 3491 barray = (MatScalar *)v + j * bs2; 3492 } else { /* Here a copy is required */ 3493 if (roworiented) { 3494 value = v + i * (stepval + bs) * bs + j * bs; 3495 } else { 3496 value = v + j * (stepval + bs) * bs + i * bs; 3497 } 3498 for (ii = 0; ii < bs; ii++, value += stepval) { 3499 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3500 } 3501 barray -= bs2; 3502 } 3503 3504 if (in[j] >= cstart && in[j] < cend) { 3505 col = in[j] - cstart; 3506 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3507 } else if (in[j] < 0) { 3508 continue; 3509 } else { 3510 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3511 if (mat->was_assembled) { 3512 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3513 3514 #if defined(PETSC_USE_DEBUG) 3515 #if defined(PETSC_USE_CTABLE) 3516 { 3517 PetscInt data; 3518 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3519 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3520 } 3521 #else 3522 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3523 #endif 3524 #endif 3525 #if defined(PETSC_USE_CTABLE) 3526 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3527 col = (col - 1) / bs; 3528 #else 3529 col = (baij->colmap[in[j]] - 1) / bs; 3530 #endif 3531 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3532 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3533 col = in[j]; 3534 } 3535 } else col = in[j]; 3536 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3537 } 3538 } 3539 } else { 3540 if (!baij->donotstash) { 3541 if (roworiented) { 3542 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3543 } else { 3544 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3545 } 3546 } 3547 } 3548 } 3549 3550 /* task normally handled by MatSetValuesBlocked() */ 3551 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3552 PetscFunctionReturn(PETSC_SUCCESS); 3553 } 3554 3555 /*@ 3556 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3557 3558 Collective 3559 3560 Input Parameters: 3561 + comm - MPI communicator 3562 . bs - the block size, only a block size of 1 is supported 3563 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3564 . n - This value should be the same as the local size used in creating the 3565 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3566 calculated if `N` is given) For square matrices `n` is almost always `m`. 3567 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3568 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3569 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3570 . j - column indices 3571 - a - matrix values 3572 3573 Output Parameter: 3574 . mat - the matrix 3575 3576 Level: intermediate 3577 3578 Notes: 3579 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3580 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3581 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3582 3583 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3584 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3585 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3586 with column-major ordering within blocks. 3587 3588 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3589 3590 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3591 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3592 @*/ 3593 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3594 { 3595 PetscFunctionBegin; 3596 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3597 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3598 PetscCall(MatCreate(comm, mat)); 3599 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3600 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3601 PetscCall(MatSetBlockSize(*mat, bs)); 3602 PetscCall(MatSetUp(*mat)); 3603 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3604 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3605 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3606 PetscFunctionReturn(PETSC_SUCCESS); 3607 } 3608 3609 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3610 { 3611 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3612 PetscInt *indx; 3613 PetscScalar *values; 3614 3615 PetscFunctionBegin; 3616 PetscCall(MatGetSize(inmat, &m, &N)); 3617 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3618 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3619 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3620 PetscInt *bindx, rmax = a->rmax, j; 3621 PetscMPIInt rank, size; 3622 3623 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3624 mbs = m / bs; 3625 Nbs = N / cbs; 3626 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3627 nbs = n / cbs; 3628 3629 PetscCall(PetscMalloc1(rmax, &bindx)); 3630 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3631 3632 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3633 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3634 if (rank == size - 1) { 3635 /* Check sum(nbs) = Nbs */ 3636 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3637 } 3638 3639 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3640 for (i = 0; i < mbs; i++) { 3641 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3642 nnz = nnz / bs; 3643 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3644 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3645 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3646 } 3647 PetscCall(PetscFree(bindx)); 3648 3649 PetscCall(MatCreate(comm, outmat)); 3650 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3651 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3652 PetscCall(MatSetType(*outmat, MATBAIJ)); 3653 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3654 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3655 MatPreallocateEnd(dnz, onz); 3656 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3657 } 3658 3659 /* numeric phase */ 3660 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3661 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3662 3663 for (i = 0; i < m; i++) { 3664 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3665 Ii = i + rstart; 3666 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3667 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3668 } 3669 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3670 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3671 PetscFunctionReturn(PETSC_SUCCESS); 3672 } 3673