1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_DEBUG) 461 #if defined(PETSC_USE_CTABLE) 462 { 463 PetscInt data; 464 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 465 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 466 } 467 #else 468 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 469 #endif 470 #endif 471 #if defined(PETSC_USE_CTABLE) 472 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 473 col = (col - 1) / bs; 474 #else 475 col = (baij->colmap[in[j]] - 1) / bs; 476 #endif 477 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 478 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 479 col = in[j]; 480 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 481 } else col = in[j]; 482 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 483 } 484 } 485 } else { 486 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 487 if (!baij->donotstash) { 488 if (roworiented) { 489 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 490 } else { 491 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 492 } 493 } 494 } 495 } 496 PetscFunctionReturn(PETSC_SUCCESS); 497 } 498 499 #define HASH_KEY 0.6180339887 500 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 501 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 502 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 503 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 504 { 505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 506 PetscBool roworiented = baij->roworiented; 507 PetscInt i, j, row, col; 508 PetscInt rstart_orig = mat->rmap->rstart; 509 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 510 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 511 PetscReal tmp; 512 MatScalar **HD = baij->hd, value; 513 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 514 515 PetscFunctionBegin; 516 for (i = 0; i < m; i++) { 517 if (PetscDefined(USE_DEBUG)) { 518 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 519 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 520 } 521 row = im[i]; 522 if (row >= rstart_orig && row < rend_orig) { 523 for (j = 0; j < n; j++) { 524 col = in[j]; 525 if (roworiented) value = v[i * n + j]; 526 else value = v[i + j * m]; 527 /* Look up PetscInto the Hash Table */ 528 key = (row / bs) * Nbs + (col / bs) + 1; 529 h1 = HASH(size, key, tmp); 530 531 idx = h1; 532 if (PetscDefined(USE_DEBUG)) { 533 insert_ct++; 534 total_ct++; 535 if (HT[idx] != key) { 536 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 537 if (idx == size) { 538 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 539 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 540 } 541 } 542 } else if (HT[idx] != key) { 543 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 544 if (idx == size) { 545 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 546 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 547 } 548 } 549 /* A HASH table entry is found, so insert the values at the correct address */ 550 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 551 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 552 } 553 } else if (!baij->donotstash) { 554 if (roworiented) { 555 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 556 } else { 557 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 558 } 559 } 560 } 561 if (PetscDefined(USE_DEBUG)) { 562 baij->ht_total_ct += total_ct; 563 baij->ht_insert_ct += insert_ct; 564 } 565 PetscFunctionReturn(PETSC_SUCCESS); 566 } 567 568 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 569 { 570 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 571 PetscBool roworiented = baij->roworiented; 572 PetscInt i, j, ii, jj, row, col; 573 PetscInt rstart = baij->rstartbs; 574 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 575 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 576 PetscReal tmp; 577 MatScalar **HD = baij->hd, *baij_a; 578 const PetscScalar *v_t, *value; 579 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 580 581 PetscFunctionBegin; 582 if (roworiented) stepval = (n - 1) * bs; 583 else stepval = (m - 1) * bs; 584 585 for (i = 0; i < m; i++) { 586 if (PetscDefined(USE_DEBUG)) { 587 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 588 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 589 } 590 row = im[i]; 591 v_t = v + i * nbs2; 592 if (row >= rstart && row < rend) { 593 for (j = 0; j < n; j++) { 594 col = in[j]; 595 596 /* Look up into the Hash Table */ 597 key = row * Nbs + col + 1; 598 h1 = HASH(size, key, tmp); 599 600 idx = h1; 601 if (PetscDefined(USE_DEBUG)) { 602 total_ct++; 603 insert_ct++; 604 if (HT[idx] != key) { 605 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 606 if (idx == size) { 607 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 608 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 609 } 610 } 611 } else if (HT[idx] != key) { 612 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 613 if (idx == size) { 614 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 615 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 616 } 617 } 618 baij_a = HD[idx]; 619 if (roworiented) { 620 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 621 /* value = v + (i*(stepval+bs)+j)*bs; */ 622 value = v_t; 623 v_t += bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval) { 626 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval) { 630 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 631 } 632 } 633 } else { 634 value = v + j * (stepval + bs) * bs + i * bs; 635 if (addv == ADD_VALUES) { 636 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 637 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 638 } 639 } else { 640 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 641 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 642 } 643 } 644 } 645 } 646 } else { 647 if (!baij->donotstash) { 648 if (roworiented) { 649 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 650 } else { 651 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 652 } 653 } 654 } 655 } 656 if (PetscDefined(USE_DEBUG)) { 657 baij->ht_total_ct += total_ct; 658 baij->ht_insert_ct += insert_ct; 659 } 660 PetscFunctionReturn(PETSC_SUCCESS); 661 } 662 663 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 664 { 665 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 666 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 667 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 668 669 PetscFunctionBegin; 670 for (i = 0; i < m; i++) { 671 if (idxm[i] < 0) continue; /* negative row */ 672 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 673 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 674 row = idxm[i] - bsrstart; 675 for (j = 0; j < n; j++) { 676 if (idxn[j] < 0) continue; /* negative column */ 677 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 678 if (idxn[j] >= bscstart && idxn[j] < bscend) { 679 col = idxn[j] - bscstart; 680 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 681 } else { 682 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 683 #if defined(PETSC_USE_CTABLE) 684 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 685 data--; 686 #else 687 data = baij->colmap[idxn[j] / bs] - 1; 688 #endif 689 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 690 else { 691 col = data + idxn[j] % bs; 692 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 693 } 694 } 695 } 696 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 697 } 698 PetscFunctionReturn(PETSC_SUCCESS); 699 } 700 701 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 702 { 703 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 704 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 705 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 706 PetscReal sum = 0.0; 707 MatScalar *v; 708 709 PetscFunctionBegin; 710 if (baij->size == 1) { 711 PetscCall(MatNorm(baij->A, type, nrm)); 712 } else { 713 if (type == NORM_FROBENIUS) { 714 v = amat->a; 715 nz = amat->nz * bs2; 716 for (i = 0; i < nz; i++) { 717 sum += PetscRealPart(PetscConj(*v) * (*v)); 718 v++; 719 } 720 v = bmat->a; 721 nz = bmat->nz * bs2; 722 for (i = 0; i < nz; i++) { 723 sum += PetscRealPart(PetscConj(*v) * (*v)); 724 v++; 725 } 726 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 727 *nrm = PetscSqrtReal(*nrm); 728 } else if (type == NORM_1) { /* max column sum */ 729 PetscReal *tmp, *tmp2; 730 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 731 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 732 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 733 v = amat->a; 734 jj = amat->j; 735 for (i = 0; i < amat->nz; i++) { 736 for (j = 0; j < bs; j++) { 737 col = bs * (cstart + *jj) + j; /* column index */ 738 for (row = 0; row < bs; row++) { 739 tmp[col] += PetscAbsScalar(*v); 740 v++; 741 } 742 } 743 jj++; 744 } 745 v = bmat->a; 746 jj = bmat->j; 747 for (i = 0; i < bmat->nz; i++) { 748 for (j = 0; j < bs; j++) { 749 col = bs * garray[*jj] + j; 750 for (row = 0; row < bs; row++) { 751 tmp[col] += PetscAbsScalar(*v); 752 v++; 753 } 754 } 755 jj++; 756 } 757 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 758 *nrm = 0.0; 759 for (j = 0; j < mat->cmap->N; j++) { 760 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 761 } 762 PetscCall(PetscFree(tmp)); 763 PetscCall(PetscFree(tmp2)); 764 } else if (type == NORM_INFINITY) { /* max row sum */ 765 PetscReal *sums; 766 PetscCall(PetscMalloc1(bs, &sums)); 767 sum = 0.0; 768 for (j = 0; j < amat->mbs; j++) { 769 for (row = 0; row < bs; row++) sums[row] = 0.0; 770 v = amat->a + bs2 * amat->i[j]; 771 nz = amat->i[j + 1] - amat->i[j]; 772 for (i = 0; i < nz; i++) { 773 for (col = 0; col < bs; col++) { 774 for (row = 0; row < bs; row++) { 775 sums[row] += PetscAbsScalar(*v); 776 v++; 777 } 778 } 779 } 780 v = bmat->a + bs2 * bmat->i[j]; 781 nz = bmat->i[j + 1] - bmat->i[j]; 782 for (i = 0; i < nz; i++) { 783 for (col = 0; col < bs; col++) { 784 for (row = 0; row < bs; row++) { 785 sums[row] += PetscAbsScalar(*v); 786 v++; 787 } 788 } 789 } 790 for (row = 0; row < bs; row++) { 791 if (sums[row] > sum) sum = sums[row]; 792 } 793 } 794 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 795 PetscCall(PetscFree(sums)); 796 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 797 } 798 PetscFunctionReturn(PETSC_SUCCESS); 799 } 800 801 /* 802 Creates the hash table, and sets the table 803 This table is created only once. 804 If new entried need to be added to the matrix 805 then the hash table has to be destroyed and 806 recreated. 807 */ 808 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 809 { 810 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 811 Mat A = baij->A, B = baij->B; 812 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 813 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 814 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 815 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 816 PetscInt *HT, key; 817 MatScalar **HD; 818 PetscReal tmp; 819 #if defined(PETSC_USE_INFO) 820 PetscInt ct = 0, max = 0; 821 #endif 822 823 PetscFunctionBegin; 824 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 825 826 baij->ht_size = (PetscInt)(factor * nz); 827 ht_size = baij->ht_size; 828 829 /* Allocate Memory for Hash Table */ 830 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 831 HD = baij->hd; 832 HT = baij->ht; 833 834 /* Loop Over A */ 835 for (i = 0; i < a->mbs; i++) { 836 for (j = ai[i]; j < ai[i + 1]; j++) { 837 row = i + rstart; 838 col = aj[j] + cstart; 839 840 key = row * Nbs + col + 1; 841 h1 = HASH(ht_size, key, tmp); 842 for (k = 0; k < ht_size; k++) { 843 if (!HT[(h1 + k) % ht_size]) { 844 HT[(h1 + k) % ht_size] = key; 845 HD[(h1 + k) % ht_size] = a->a + j * bs2; 846 break; 847 #if defined(PETSC_USE_INFO) 848 } else { 849 ct++; 850 #endif 851 } 852 } 853 #if defined(PETSC_USE_INFO) 854 if (k > max) max = k; 855 #endif 856 } 857 } 858 /* Loop Over B */ 859 for (i = 0; i < b->mbs; i++) { 860 for (j = bi[i]; j < bi[i + 1]; j++) { 861 row = i + rstart; 862 col = garray[bj[j]]; 863 key = row * Nbs + col + 1; 864 h1 = HASH(ht_size, key, tmp); 865 for (k = 0; k < ht_size; k++) { 866 if (!HT[(h1 + k) % ht_size]) { 867 HT[(h1 + k) % ht_size] = key; 868 HD[(h1 + k) % ht_size] = b->a + j * bs2; 869 break; 870 #if defined(PETSC_USE_INFO) 871 } else { 872 ct++; 873 #endif 874 } 875 } 876 #if defined(PETSC_USE_INFO) 877 if (k > max) max = k; 878 #endif 879 } 880 } 881 882 /* Print Summary */ 883 #if defined(PETSC_USE_INFO) 884 for (i = 0, j = 0; i < ht_size; i++) { 885 if (HT[i]) j++; 886 } 887 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 888 #endif 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 893 { 894 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 895 PetscInt nstash, reallocs; 896 897 PetscFunctionBegin; 898 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 899 900 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 901 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 902 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 903 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 904 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 905 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 906 PetscFunctionReturn(PETSC_SUCCESS); 907 } 908 909 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 910 { 911 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 912 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 913 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 914 PetscInt *row, *col; 915 PetscBool r1, r2, r3, other_disassembled; 916 MatScalar *val; 917 PetscMPIInt n; 918 919 PetscFunctionBegin; 920 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 921 if (!baij->donotstash && !mat->nooffprocentries) { 922 while (1) { 923 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 924 if (!flg) break; 925 926 for (i = 0; i < n;) { 927 /* Now identify the consecutive vals belonging to the same row */ 928 for (j = i, rstart = row[j]; j < n; j++) { 929 if (row[j] != rstart) break; 930 } 931 if (j < n) ncols = j - i; 932 else ncols = n - i; 933 /* Now assemble all these values with a single function call */ 934 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 935 i = j; 936 } 937 } 938 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 939 /* Now process the block-stash. Since the values are stashed column-oriented, 940 set the row-oriented flag to column-oriented, and after MatSetValues() 941 restore the original flags */ 942 r1 = baij->roworiented; 943 r2 = a->roworiented; 944 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 945 946 baij->roworiented = PETSC_FALSE; 947 a->roworiented = PETSC_FALSE; 948 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 949 while (1) { 950 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 951 if (!flg) break; 952 953 for (i = 0; i < n;) { 954 /* Now identify the consecutive vals belonging to the same row */ 955 for (j = i, rstart = row[j]; j < n; j++) { 956 if (row[j] != rstart) break; 957 } 958 if (j < n) ncols = j - i; 959 else ncols = n - i; 960 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 961 i = j; 962 } 963 } 964 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 965 966 baij->roworiented = r1; 967 a->roworiented = r2; 968 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 969 } 970 971 PetscCall(MatAssemblyBegin(baij->A, mode)); 972 PetscCall(MatAssemblyEnd(baij->A, mode)); 973 974 /* determine if any processor has disassembled, if so we must 975 also disassemble ourselves, in order that we may reassemble. */ 976 /* 977 if nonzero structure of submatrix B cannot change then we know that 978 no processor disassembled thus we can skip this stuff 979 */ 980 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 981 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 982 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 983 } 984 985 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 986 PetscCall(MatAssemblyBegin(baij->B, mode)); 987 PetscCall(MatAssemblyEnd(baij->B, mode)); 988 989 #if defined(PETSC_USE_INFO) 990 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 991 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 992 993 baij->ht_total_ct = 0; 994 baij->ht_insert_ct = 0; 995 } 996 #endif 997 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 998 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 999 1000 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 1001 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 1002 } 1003 1004 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1005 1006 baij->rowvalues = NULL; 1007 1008 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1009 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1010 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1011 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1012 } 1013 PetscFunctionReturn(PETSC_SUCCESS); 1014 } 1015 1016 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1017 #include <petscdraw.h> 1018 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1019 { 1020 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1021 PetscMPIInt rank = baij->rank; 1022 PetscInt bs = mat->rmap->bs; 1023 PetscBool iascii, isdraw; 1024 PetscViewer sviewer; 1025 PetscViewerFormat format; 1026 1027 PetscFunctionBegin; 1028 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1029 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1030 if (iascii) { 1031 PetscCall(PetscViewerGetFormat(viewer, &format)); 1032 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1033 MatInfo info; 1034 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1035 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1036 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1037 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1038 mat->rmap->bs, (double)info.memory)); 1039 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1040 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1041 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1042 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1043 PetscCall(PetscViewerFlush(viewer)); 1044 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1045 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1046 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1049 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1050 PetscFunctionReturn(PETSC_SUCCESS); 1051 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1052 PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 } 1055 1056 if (isdraw) { 1057 PetscDraw draw; 1058 PetscBool isnull; 1059 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1060 PetscCall(PetscDrawIsNull(draw, &isnull)); 1061 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1062 } 1063 1064 { 1065 /* assemble the entire matrix onto first processor. */ 1066 Mat A; 1067 Mat_SeqBAIJ *Aloc; 1068 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1069 MatScalar *a; 1070 const char *matname; 1071 1072 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1073 /* Perhaps this should be the type of mat? */ 1074 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1075 if (rank == 0) { 1076 PetscCall(MatSetSizes(A, M, N, M, N)); 1077 } else { 1078 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1079 } 1080 PetscCall(MatSetType(A, MATMPIBAIJ)); 1081 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1082 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1083 1084 /* copy over the A part */ 1085 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1086 ai = Aloc->i; 1087 aj = Aloc->j; 1088 a = Aloc->a; 1089 PetscCall(PetscMalloc1(bs, &rvals)); 1090 1091 for (i = 0; i < mbs; i++) { 1092 rvals[0] = bs * (baij->rstartbs + i); 1093 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1094 for (j = ai[i]; j < ai[i + 1]; j++) { 1095 col = (baij->cstartbs + aj[j]) * bs; 1096 for (k = 0; k < bs; k++) { 1097 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1098 col++; 1099 a += bs; 1100 } 1101 } 1102 } 1103 /* copy over the B part */ 1104 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1105 ai = Aloc->i; 1106 aj = Aloc->j; 1107 a = Aloc->a; 1108 for (i = 0; i < mbs; i++) { 1109 rvals[0] = bs * (baij->rstartbs + i); 1110 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1111 for (j = ai[i]; j < ai[i + 1]; j++) { 1112 col = baij->garray[aj[j]] * bs; 1113 for (k = 0; k < bs; k++) { 1114 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1115 col++; 1116 a += bs; 1117 } 1118 } 1119 } 1120 PetscCall(PetscFree(rvals)); 1121 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1122 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1123 /* 1124 Everyone has to call to draw the matrix since the graphics waits are 1125 synchronized across all processors that share the PetscDraw object 1126 */ 1127 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1128 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1129 if (rank == 0) { 1130 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1131 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1132 } 1133 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1134 PetscCall(MatDestroy(&A)); 1135 } 1136 PetscFunctionReturn(PETSC_SUCCESS); 1137 } 1138 1139 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1140 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1141 { 1142 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1143 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1144 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1145 const PetscInt *garray = aij->garray; 1146 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1147 PetscInt64 nz, hnz; 1148 PetscInt *rowlens, *colidxs; 1149 PetscScalar *matvals; 1150 PetscMPIInt rank; 1151 1152 PetscFunctionBegin; 1153 PetscCall(PetscViewerSetUp(viewer)); 1154 1155 M = mat->rmap->N; 1156 N = mat->cmap->N; 1157 m = mat->rmap->n; 1158 rs = mat->rmap->rstart; 1159 cs = mat->cmap->rstart; 1160 bs = mat->rmap->bs; 1161 nz = bs * bs * (A->nz + B->nz); 1162 1163 /* write matrix header */ 1164 header[0] = MAT_FILE_CLASSID; 1165 header[1] = M; 1166 header[2] = N; 1167 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1168 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1169 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1170 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1171 1172 /* fill in and store row lengths */ 1173 PetscCall(PetscMalloc1(m, &rowlens)); 1174 for (cnt = 0, i = 0; i < A->mbs; i++) 1175 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1176 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1177 PetscCall(PetscFree(rowlens)); 1178 1179 /* fill in and store column indices */ 1180 PetscCall(PetscMalloc1(nz, &colidxs)); 1181 for (cnt = 0, i = 0; i < A->mbs; i++) { 1182 for (k = 0; k < bs; k++) { 1183 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1184 if (garray[B->j[jb]] > cs / bs) break; 1185 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1186 } 1187 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1188 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1189 for (; jb < B->i[i + 1]; jb++) 1190 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1191 } 1192 } 1193 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1194 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1195 PetscCall(PetscFree(colidxs)); 1196 1197 /* fill in and store nonzero values */ 1198 PetscCall(PetscMalloc1(nz, &matvals)); 1199 for (cnt = 0, i = 0; i < A->mbs; i++) { 1200 for (k = 0; k < bs; k++) { 1201 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1202 if (garray[B->j[jb]] > cs / bs) break; 1203 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1204 } 1205 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1206 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1207 for (; jb < B->i[i + 1]; jb++) 1208 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1209 } 1210 } 1211 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1212 PetscCall(PetscFree(matvals)); 1213 1214 /* write block size option to the viewer's .info file */ 1215 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1216 PetscFunctionReturn(PETSC_SUCCESS); 1217 } 1218 1219 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1220 { 1221 PetscBool iascii, isdraw, issocket, isbinary; 1222 1223 PetscFunctionBegin; 1224 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1225 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1226 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1227 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1228 if (iascii || isdraw || issocket) { 1229 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1230 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1231 PetscFunctionReturn(PETSC_SUCCESS); 1232 } 1233 1234 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1237 PetscInt nt; 1238 1239 PetscFunctionBegin; 1240 PetscCall(VecGetLocalSize(xx, &nt)); 1241 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1242 PetscCall(VecGetLocalSize(yy, &nt)); 1243 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1248 PetscFunctionReturn(PETSC_SUCCESS); 1249 } 1250 1251 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1257 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1258 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1259 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1260 PetscFunctionReturn(PETSC_SUCCESS); 1261 } 1262 1263 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1264 { 1265 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1266 1267 PetscFunctionBegin; 1268 /* do nondiagonal part */ 1269 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1270 /* do local part */ 1271 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1272 /* add partial results together */ 1273 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1274 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1275 PetscFunctionReturn(PETSC_SUCCESS); 1276 } 1277 1278 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1279 { 1280 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1281 1282 PetscFunctionBegin; 1283 /* do nondiagonal part */ 1284 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1285 /* do local part */ 1286 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1287 /* add partial results together */ 1288 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1289 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1290 PetscFunctionReturn(PETSC_SUCCESS); 1291 } 1292 1293 /* 1294 This only works correctly for square matrices where the subblock A->A is the 1295 diagonal block 1296 */ 1297 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1298 { 1299 PetscFunctionBegin; 1300 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1301 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1302 PetscFunctionReturn(PETSC_SUCCESS); 1303 } 1304 1305 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1306 { 1307 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1308 1309 PetscFunctionBegin; 1310 PetscCall(MatScale(a->A, aa)); 1311 PetscCall(MatScale(a->B, aa)); 1312 PetscFunctionReturn(PETSC_SUCCESS); 1313 } 1314 1315 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1316 { 1317 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1318 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1319 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1320 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1321 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1322 1323 PetscFunctionBegin; 1324 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1325 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1326 mat->getrowactive = PETSC_TRUE; 1327 1328 if (!mat->rowvalues && (idx || v)) { 1329 /* 1330 allocate enough space to hold information from the longest row. 1331 */ 1332 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1333 PetscInt max = 1, mbs = mat->mbs, tmp; 1334 for (i = 0; i < mbs; i++) { 1335 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1336 if (max < tmp) max = tmp; 1337 } 1338 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1339 } 1340 lrow = row - brstart; 1341 1342 pvA = &vworkA; 1343 pcA = &cworkA; 1344 pvB = &vworkB; 1345 pcB = &cworkB; 1346 if (!v) { 1347 pvA = NULL; 1348 pvB = NULL; 1349 } 1350 if (!idx) { 1351 pcA = NULL; 1352 if (!v) pcB = NULL; 1353 } 1354 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1355 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1356 nztot = nzA + nzB; 1357 1358 cmap = mat->garray; 1359 if (v || idx) { 1360 if (nztot) { 1361 /* Sort by increasing column numbers, assuming A and B already sorted */ 1362 PetscInt imark = -1; 1363 if (v) { 1364 *v = v_p = mat->rowvalues; 1365 for (i = 0; i < nzB; i++) { 1366 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1367 else break; 1368 } 1369 imark = i; 1370 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1371 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1372 } 1373 if (idx) { 1374 *idx = idx_p = mat->rowindices; 1375 if (imark > -1) { 1376 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1377 } else { 1378 for (i = 0; i < nzB; i++) { 1379 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1380 else break; 1381 } 1382 imark = i; 1383 } 1384 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1385 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1386 } 1387 } else { 1388 if (idx) *idx = NULL; 1389 if (v) *v = NULL; 1390 } 1391 } 1392 *nz = nztot; 1393 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1394 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1395 PetscFunctionReturn(PETSC_SUCCESS); 1396 } 1397 1398 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1399 { 1400 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1401 1402 PetscFunctionBegin; 1403 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1404 baij->getrowactive = PETSC_FALSE; 1405 PetscFunctionReturn(PETSC_SUCCESS); 1406 } 1407 1408 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1409 { 1410 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1411 1412 PetscFunctionBegin; 1413 PetscCall(MatZeroEntries(l->A)); 1414 PetscCall(MatZeroEntries(l->B)); 1415 PetscFunctionReturn(PETSC_SUCCESS); 1416 } 1417 1418 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1419 { 1420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1421 Mat A = a->A, B = a->B; 1422 PetscLogDouble isend[5], irecv[5]; 1423 1424 PetscFunctionBegin; 1425 info->block_size = (PetscReal)matin->rmap->bs; 1426 1427 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1428 1429 isend[0] = info->nz_used; 1430 isend[1] = info->nz_allocated; 1431 isend[2] = info->nz_unneeded; 1432 isend[3] = info->memory; 1433 isend[4] = info->mallocs; 1434 1435 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1436 1437 isend[0] += info->nz_used; 1438 isend[1] += info->nz_allocated; 1439 isend[2] += info->nz_unneeded; 1440 isend[3] += info->memory; 1441 isend[4] += info->mallocs; 1442 1443 if (flag == MAT_LOCAL) { 1444 info->nz_used = isend[0]; 1445 info->nz_allocated = isend[1]; 1446 info->nz_unneeded = isend[2]; 1447 info->memory = isend[3]; 1448 info->mallocs = isend[4]; 1449 } else if (flag == MAT_GLOBAL_MAX) { 1450 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else if (flag == MAT_GLOBAL_SUM) { 1458 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1459 1460 info->nz_used = irecv[0]; 1461 info->nz_allocated = irecv[1]; 1462 info->nz_unneeded = irecv[2]; 1463 info->memory = irecv[3]; 1464 info->mallocs = irecv[4]; 1465 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1466 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1467 info->fill_ratio_needed = 0; 1468 info->factor_mallocs = 0; 1469 PetscFunctionReturn(PETSC_SUCCESS); 1470 } 1471 1472 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1473 { 1474 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1475 1476 PetscFunctionBegin; 1477 switch (op) { 1478 case MAT_NEW_NONZERO_LOCATIONS: 1479 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1480 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1481 case MAT_KEEP_NONZERO_PATTERN: 1482 case MAT_NEW_NONZERO_LOCATION_ERR: 1483 MatCheckPreallocated(A, 1); 1484 PetscCall(MatSetOption(a->A, op, flg)); 1485 PetscCall(MatSetOption(a->B, op, flg)); 1486 break; 1487 case MAT_ROW_ORIENTED: 1488 MatCheckPreallocated(A, 1); 1489 a->roworiented = flg; 1490 1491 PetscCall(MatSetOption(a->A, op, flg)); 1492 PetscCall(MatSetOption(a->B, op, flg)); 1493 break; 1494 case MAT_FORCE_DIAGONAL_ENTRIES: 1495 case MAT_SORTED_FULL: 1496 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1497 break; 1498 case MAT_IGNORE_OFF_PROC_ENTRIES: 1499 a->donotstash = flg; 1500 break; 1501 case MAT_USE_HASH_TABLE: 1502 a->ht_flag = flg; 1503 a->ht_fact = 1.39; 1504 break; 1505 case MAT_SYMMETRIC: 1506 case MAT_STRUCTURALLY_SYMMETRIC: 1507 case MAT_HERMITIAN: 1508 case MAT_SUBMAT_SINGLEIS: 1509 case MAT_SYMMETRY_ETERNAL: 1510 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1511 case MAT_SPD_ETERNAL: 1512 /* if the diagonal matrix is square it inherits some of the properties above */ 1513 break; 1514 default: 1515 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1516 } 1517 PetscFunctionReturn(PETSC_SUCCESS); 1518 } 1519 1520 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1521 { 1522 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1523 Mat_SeqBAIJ *Aloc; 1524 Mat B; 1525 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1526 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1527 MatScalar *a; 1528 1529 PetscFunctionBegin; 1530 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1531 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1532 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1533 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1534 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1535 /* Do not know preallocation information, but must set block size */ 1536 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1537 } else { 1538 B = *matout; 1539 } 1540 1541 /* copy over the A part */ 1542 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1543 ai = Aloc->i; 1544 aj = Aloc->j; 1545 a = Aloc->a; 1546 PetscCall(PetscMalloc1(bs, &rvals)); 1547 1548 for (i = 0; i < mbs; i++) { 1549 rvals[0] = bs * (baij->rstartbs + i); 1550 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1551 for (j = ai[i]; j < ai[i + 1]; j++) { 1552 col = (baij->cstartbs + aj[j]) * bs; 1553 for (k = 0; k < bs; k++) { 1554 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1555 1556 col++; 1557 a += bs; 1558 } 1559 } 1560 } 1561 /* copy over the B part */ 1562 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1563 ai = Aloc->i; 1564 aj = Aloc->j; 1565 a = Aloc->a; 1566 for (i = 0; i < mbs; i++) { 1567 rvals[0] = bs * (baij->rstartbs + i); 1568 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1569 for (j = ai[i]; j < ai[i + 1]; j++) { 1570 col = baij->garray[aj[j]] * bs; 1571 for (k = 0; k < bs; k++) { 1572 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1573 col++; 1574 a += bs; 1575 } 1576 } 1577 } 1578 PetscCall(PetscFree(rvals)); 1579 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1580 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1581 1582 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1583 else PetscCall(MatHeaderMerge(A, &B)); 1584 PetscFunctionReturn(PETSC_SUCCESS); 1585 } 1586 1587 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1588 { 1589 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1590 Mat a = baij->A, b = baij->B; 1591 PetscInt s1, s2, s3; 1592 1593 PetscFunctionBegin; 1594 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1595 if (rr) { 1596 PetscCall(VecGetLocalSize(rr, &s1)); 1597 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1598 /* Overlap communication with computation. */ 1599 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 } 1601 if (ll) { 1602 PetscCall(VecGetLocalSize(ll, &s1)); 1603 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1604 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1605 } 1606 /* scale the diagonal block */ 1607 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1608 1609 if (rr) { 1610 /* Do a scatter end and then right scale the off-diagonal block */ 1611 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1612 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1613 } 1614 PetscFunctionReturn(PETSC_SUCCESS); 1615 } 1616 1617 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1618 { 1619 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1620 PetscInt *lrows; 1621 PetscInt r, len; 1622 PetscBool cong; 1623 1624 PetscFunctionBegin; 1625 /* get locally owned rows */ 1626 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1627 /* fix right-hand side if needed */ 1628 if (x && b) { 1629 const PetscScalar *xx; 1630 PetscScalar *bb; 1631 1632 PetscCall(VecGetArrayRead(x, &xx)); 1633 PetscCall(VecGetArray(b, &bb)); 1634 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1635 PetscCall(VecRestoreArrayRead(x, &xx)); 1636 PetscCall(VecRestoreArray(b, &bb)); 1637 } 1638 1639 /* actually zap the local rows */ 1640 /* 1641 Zero the required rows. If the "diagonal block" of the matrix 1642 is square and the user wishes to set the diagonal we use separate 1643 code so that MatSetValues() is not called for each diagonal allocating 1644 new memory, thus calling lots of mallocs and slowing things down. 1645 1646 */ 1647 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1648 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1649 PetscCall(MatHasCongruentLayouts(A, &cong)); 1650 if ((diag != 0.0) && cong) { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1652 } else if (diag != 0.0) { 1653 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1654 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1655 for (r = 0; r < len; ++r) { 1656 const PetscInt row = lrows[r] + A->rmap->rstart; 1657 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1658 } 1659 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1660 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1661 } else { 1662 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1663 } 1664 PetscCall(PetscFree(lrows)); 1665 1666 /* only change matrix nonzero state if pattern was allowed to be changed */ 1667 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1668 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1669 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1670 } 1671 PetscFunctionReturn(PETSC_SUCCESS); 1672 } 1673 1674 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1675 { 1676 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1677 PetscMPIInt n = A->rmap->n, p = 0; 1678 PetscInt i, j, k, r, len = 0, row, col, count; 1679 PetscInt *lrows, *owners = A->rmap->range; 1680 PetscSFNode *rrows; 1681 PetscSF sf; 1682 const PetscScalar *xx; 1683 PetscScalar *bb, *mask; 1684 Vec xmask, lmask; 1685 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1686 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1687 PetscScalar *aa; 1688 1689 PetscFunctionBegin; 1690 /* Create SF where leaves are input rows and roots are owned rows */ 1691 PetscCall(PetscMalloc1(n, &lrows)); 1692 for (r = 0; r < n; ++r) lrows[r] = -1; 1693 PetscCall(PetscMalloc1(N, &rrows)); 1694 for (r = 0; r < N; ++r) { 1695 const PetscInt idx = rows[r]; 1696 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1697 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1698 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1699 } 1700 rrows[r].rank = p; 1701 rrows[r].index = rows[r] - owners[p]; 1702 } 1703 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1704 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1705 /* Collect flags for rows to be zeroed */ 1706 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1707 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1708 PetscCall(PetscSFDestroy(&sf)); 1709 /* Compress and put in row numbers */ 1710 for (r = 0; r < n; ++r) 1711 if (lrows[r] >= 0) lrows[len++] = r; 1712 /* zero diagonal part of matrix */ 1713 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1714 /* handle off-diagonal part of matrix */ 1715 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1716 PetscCall(VecDuplicate(l->lvec, &lmask)); 1717 PetscCall(VecGetArray(xmask, &bb)); 1718 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1719 PetscCall(VecRestoreArray(xmask, &bb)); 1720 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1721 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1722 PetscCall(VecDestroy(&xmask)); 1723 if (x) { 1724 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1725 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1726 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1727 PetscCall(VecGetArray(b, &bb)); 1728 } 1729 PetscCall(VecGetArray(lmask, &mask)); 1730 /* remove zeroed rows of off-diagonal matrix */ 1731 for (i = 0; i < len; ++i) { 1732 row = lrows[i]; 1733 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1734 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1735 for (k = 0; k < count; ++k) { 1736 aa[0] = 0.0; 1737 aa += bs; 1738 } 1739 } 1740 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1741 for (i = 0; i < l->B->rmap->N; ++i) { 1742 row = i / bs; 1743 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1744 for (k = 0; k < bs; ++k) { 1745 col = bs * baij->j[j] + k; 1746 if (PetscAbsScalar(mask[col])) { 1747 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1748 if (x) bb[i] -= aa[0] * xx[col]; 1749 aa[0] = 0.0; 1750 } 1751 } 1752 } 1753 } 1754 if (x) { 1755 PetscCall(VecRestoreArray(b, &bb)); 1756 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1757 } 1758 PetscCall(VecRestoreArray(lmask, &mask)); 1759 PetscCall(VecDestroy(&lmask)); 1760 PetscCall(PetscFree(lrows)); 1761 1762 /* only change matrix nonzero state if pattern was allowed to be changed */ 1763 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1764 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1765 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1766 } 1767 PetscFunctionReturn(PETSC_SUCCESS); 1768 } 1769 1770 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1771 { 1772 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1773 1774 PetscFunctionBegin; 1775 PetscCall(MatSetUnfactored(a->A)); 1776 PetscFunctionReturn(PETSC_SUCCESS); 1777 } 1778 1779 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1780 1781 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1782 { 1783 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1784 Mat a, b, c, d; 1785 PetscBool flg; 1786 1787 PetscFunctionBegin; 1788 a = matA->A; 1789 b = matA->B; 1790 c = matB->A; 1791 d = matB->B; 1792 1793 PetscCall(MatEqual(a, c, &flg)); 1794 if (flg) PetscCall(MatEqual(b, d, &flg)); 1795 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1800 { 1801 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1802 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1803 1804 PetscFunctionBegin; 1805 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1806 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1807 PetscCall(MatCopy_Basic(A, B, str)); 1808 } else { 1809 PetscCall(MatCopy(a->A, b->A, str)); 1810 PetscCall(MatCopy(a->B, b->B, str)); 1811 } 1812 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1817 { 1818 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1819 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1820 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1821 1822 PetscFunctionBegin; 1823 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1824 PetscFunctionReturn(PETSC_SUCCESS); 1825 } 1826 1827 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1828 { 1829 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1830 PetscBLASInt bnz, one = 1; 1831 Mat_SeqBAIJ *x, *y; 1832 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1833 1834 PetscFunctionBegin; 1835 if (str == SAME_NONZERO_PATTERN) { 1836 PetscScalar alpha = a; 1837 x = (Mat_SeqBAIJ *)xx->A->data; 1838 y = (Mat_SeqBAIJ *)yy->A->data; 1839 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1840 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1841 x = (Mat_SeqBAIJ *)xx->B->data; 1842 y = (Mat_SeqBAIJ *)yy->B->data; 1843 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1844 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1845 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1846 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1847 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1848 } else { 1849 Mat B; 1850 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1851 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1852 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1853 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1854 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1855 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1856 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1857 PetscCall(MatSetType(B, MATMPIBAIJ)); 1858 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1859 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1860 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1861 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1862 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1863 PetscCall(MatHeaderMerge(Y, &B)); 1864 PetscCall(PetscFree(nnz_d)); 1865 PetscCall(PetscFree(nnz_o)); 1866 } 1867 PetscFunctionReturn(PETSC_SUCCESS); 1868 } 1869 1870 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1871 { 1872 PetscFunctionBegin; 1873 if (PetscDefined(USE_COMPLEX)) { 1874 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1875 1876 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1877 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1878 } 1879 PetscFunctionReturn(PETSC_SUCCESS); 1880 } 1881 1882 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1883 { 1884 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1885 1886 PetscFunctionBegin; 1887 PetscCall(MatRealPart(a->A)); 1888 PetscCall(MatRealPart(a->B)); 1889 PetscFunctionReturn(PETSC_SUCCESS); 1890 } 1891 1892 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1893 { 1894 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1895 1896 PetscFunctionBegin; 1897 PetscCall(MatImaginaryPart(a->A)); 1898 PetscCall(MatImaginaryPart(a->B)); 1899 PetscFunctionReturn(PETSC_SUCCESS); 1900 } 1901 1902 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1903 { 1904 IS iscol_local; 1905 PetscInt csize; 1906 1907 PetscFunctionBegin; 1908 PetscCall(ISGetLocalSize(iscol, &csize)); 1909 if (call == MAT_REUSE_MATRIX) { 1910 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1911 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1912 } else { 1913 PetscCall(ISAllGather(iscol, &iscol_local)); 1914 } 1915 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1916 if (call == MAT_INITIAL_MATRIX) { 1917 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1918 PetscCall(ISDestroy(&iscol_local)); 1919 } 1920 PetscFunctionReturn(PETSC_SUCCESS); 1921 } 1922 1923 /* 1924 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1925 in local and then by concatenating the local matrices the end result. 1926 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1927 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1928 */ 1929 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1930 { 1931 PetscMPIInt rank, size; 1932 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1933 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1934 Mat M, Mreuse; 1935 MatScalar *vwork, *aa; 1936 MPI_Comm comm; 1937 IS isrow_new, iscol_new; 1938 Mat_SeqBAIJ *aij; 1939 1940 PetscFunctionBegin; 1941 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1942 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1943 PetscCallMPI(MPI_Comm_size(comm, &size)); 1944 /* The compression and expansion should be avoided. Doesn't point 1945 out errors, might change the indices, hence buggey */ 1946 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1947 if (isrow == iscol) { 1948 iscol_new = isrow_new; 1949 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1950 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1951 1952 if (call == MAT_REUSE_MATRIX) { 1953 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1954 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1955 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1956 } else { 1957 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1958 } 1959 PetscCall(ISDestroy(&isrow_new)); 1960 PetscCall(ISDestroy(&iscol_new)); 1961 /* 1962 m - number of local rows 1963 n - number of columns (same on all processors) 1964 rstart - first row in new global matrix generated 1965 */ 1966 PetscCall(MatGetBlockSize(mat, &bs)); 1967 PetscCall(MatGetSize(Mreuse, &m, &n)); 1968 m = m / bs; 1969 n = n / bs; 1970 1971 if (call == MAT_INITIAL_MATRIX) { 1972 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1973 ii = aij->i; 1974 jj = aij->j; 1975 1976 /* 1977 Determine the number of non-zeros in the diagonal and off-diagonal 1978 portions of the matrix in order to do correct preallocation 1979 */ 1980 1981 /* first get start and end of "diagonal" columns */ 1982 if (csize == PETSC_DECIDE) { 1983 PetscCall(ISGetSize(isrow, &mglobal)); 1984 if (mglobal == n * bs) { /* square matrix */ 1985 nlocal = m; 1986 } else { 1987 nlocal = n / size + ((n % size) > rank); 1988 } 1989 } else { 1990 nlocal = csize / bs; 1991 } 1992 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1993 rstart = rend - nlocal; 1994 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1995 1996 /* next, compute all the lengths */ 1997 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1998 for (i = 0; i < m; i++) { 1999 jend = ii[i + 1] - ii[i]; 2000 olen = 0; 2001 dlen = 0; 2002 for (j = 0; j < jend; j++) { 2003 if (*jj < rstart || *jj >= rend) olen++; 2004 else dlen++; 2005 jj++; 2006 } 2007 olens[i] = olen; 2008 dlens[i] = dlen; 2009 } 2010 PetscCall(MatCreate(comm, &M)); 2011 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2012 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2013 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2014 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2015 PetscCall(PetscFree2(dlens, olens)); 2016 } else { 2017 PetscInt ml, nl; 2018 2019 M = *newmat; 2020 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2021 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2022 PetscCall(MatZeroEntries(M)); 2023 /* 2024 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2025 rather than the slower MatSetValues(). 2026 */ 2027 M->was_assembled = PETSC_TRUE; 2028 M->assembled = PETSC_FALSE; 2029 } 2030 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2031 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2032 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2033 ii = aij->i; 2034 jj = aij->j; 2035 aa = aij->a; 2036 for (i = 0; i < m; i++) { 2037 row = rstart / bs + i; 2038 nz = ii[i + 1] - ii[i]; 2039 cwork = jj; 2040 jj = PetscSafePointerPlusOffset(jj, nz); 2041 vwork = aa; 2042 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2043 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2044 } 2045 2046 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2047 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2048 *newmat = M; 2049 2050 /* save submatrix used in processor for next request */ 2051 if (call == MAT_INITIAL_MATRIX) { 2052 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2053 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2054 } 2055 PetscFunctionReturn(PETSC_SUCCESS); 2056 } 2057 2058 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2059 { 2060 MPI_Comm comm, pcomm; 2061 PetscInt clocal_size, nrows; 2062 const PetscInt *rows; 2063 PetscMPIInt size; 2064 IS crowp, lcolp; 2065 2066 PetscFunctionBegin; 2067 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2068 /* make a collective version of 'rowp' */ 2069 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2070 if (pcomm == comm) { 2071 crowp = rowp; 2072 } else { 2073 PetscCall(ISGetSize(rowp, &nrows)); 2074 PetscCall(ISGetIndices(rowp, &rows)); 2075 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2076 PetscCall(ISRestoreIndices(rowp, &rows)); 2077 } 2078 PetscCall(ISSetPermutation(crowp)); 2079 /* make a local version of 'colp' */ 2080 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2081 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2082 if (size == 1) { 2083 lcolp = colp; 2084 } else { 2085 PetscCall(ISAllGather(colp, &lcolp)); 2086 } 2087 PetscCall(ISSetPermutation(lcolp)); 2088 /* now we just get the submatrix */ 2089 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2090 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2091 /* clean up */ 2092 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2093 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2098 { 2099 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2100 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2101 2102 PetscFunctionBegin; 2103 if (nghosts) *nghosts = B->nbs; 2104 if (ghosts) *ghosts = baij->garray; 2105 PetscFunctionReturn(PETSC_SUCCESS); 2106 } 2107 2108 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2109 { 2110 Mat B; 2111 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2112 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2113 Mat_SeqAIJ *b; 2114 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2115 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2116 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2117 2118 PetscFunctionBegin; 2119 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2120 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2121 2122 /* Tell every processor the number of nonzeros per row */ 2123 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2124 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2125 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2126 displs = recvcounts + size; 2127 for (i = 0; i < size; i++) { 2128 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2129 displs[i] = A->rmap->range[i] / bs; 2130 } 2131 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2132 /* Create the sequential matrix of the same type as the local block diagonal */ 2133 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2134 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2135 PetscCall(MatSetType(B, MATSEQAIJ)); 2136 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2137 b = (Mat_SeqAIJ *)B->data; 2138 2139 /* Copy my part of matrix column indices over */ 2140 sendcount = ad->nz + bd->nz; 2141 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2142 a_jsendbuf = ad->j; 2143 b_jsendbuf = bd->j; 2144 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2145 cnt = 0; 2146 for (i = 0; i < n; i++) { 2147 /* put in lower diagonal portion */ 2148 m = bd->i[i + 1] - bd->i[i]; 2149 while (m > 0) { 2150 /* is it above diagonal (in bd (compressed) numbering) */ 2151 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2152 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2153 m--; 2154 } 2155 2156 /* put in diagonal portion */ 2157 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2158 2159 /* put in upper diagonal portion */ 2160 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2161 } 2162 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2163 2164 /* Gather all column indices to all processors */ 2165 for (i = 0; i < size; i++) { 2166 recvcounts[i] = 0; 2167 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2168 } 2169 displs[0] = 0; 2170 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2171 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2172 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2173 /* set the b->ilen (length of each row) values */ 2174 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2175 /* set the b->i indices */ 2176 b->i[0] = 0; 2177 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2178 PetscCall(PetscFree(lens)); 2179 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2180 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2181 PetscCall(PetscFree(recvcounts)); 2182 2183 PetscCall(MatPropagateSymmetryOptions(A, B)); 2184 *newmat = B; 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2189 { 2190 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2191 Vec bb1 = NULL; 2192 2193 PetscFunctionBegin; 2194 if (flag == SOR_APPLY_UPPER) { 2195 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2196 PetscFunctionReturn(PETSC_SUCCESS); 2197 } 2198 2199 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2200 2201 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2202 if (flag & SOR_ZERO_INITIAL_GUESS) { 2203 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2204 its--; 2205 } 2206 2207 while (its--) { 2208 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2209 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2210 2211 /* update rhs: bb1 = bb - B*x */ 2212 PetscCall(VecScale(mat->lvec, -1.0)); 2213 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2214 2215 /* local sweep */ 2216 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2217 } 2218 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2219 if (flag & SOR_ZERO_INITIAL_GUESS) { 2220 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2221 its--; 2222 } 2223 while (its--) { 2224 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2225 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2226 2227 /* update rhs: bb1 = bb - B*x */ 2228 PetscCall(VecScale(mat->lvec, -1.0)); 2229 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2230 2231 /* local sweep */ 2232 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2233 } 2234 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2235 if (flag & SOR_ZERO_INITIAL_GUESS) { 2236 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2237 its--; 2238 } 2239 while (its--) { 2240 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2241 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2242 2243 /* update rhs: bb1 = bb - B*x */ 2244 PetscCall(VecScale(mat->lvec, -1.0)); 2245 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2246 2247 /* local sweep */ 2248 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2249 } 2250 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2251 2252 PetscCall(VecDestroy(&bb1)); 2253 PetscFunctionReturn(PETSC_SUCCESS); 2254 } 2255 2256 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2257 { 2258 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2259 PetscInt m, N, i, *garray = aij->garray; 2260 PetscInt ib, jb, bs = A->rmap->bs; 2261 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2262 MatScalar *a_val = a_aij->a; 2263 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2264 MatScalar *b_val = b_aij->a; 2265 PetscReal *work; 2266 2267 PetscFunctionBegin; 2268 PetscCall(MatGetSize(A, &m, &N)); 2269 PetscCall(PetscCalloc1(N, &work)); 2270 if (type == NORM_2) { 2271 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2272 for (jb = 0; jb < bs; jb++) { 2273 for (ib = 0; ib < bs; ib++) { 2274 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2275 a_val++; 2276 } 2277 } 2278 } 2279 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2280 for (jb = 0; jb < bs; jb++) { 2281 for (ib = 0; ib < bs; ib++) { 2282 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2283 b_val++; 2284 } 2285 } 2286 } 2287 } else if (type == NORM_1) { 2288 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2289 for (jb = 0; jb < bs; jb++) { 2290 for (ib = 0; ib < bs; ib++) { 2291 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2292 a_val++; 2293 } 2294 } 2295 } 2296 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2297 for (jb = 0; jb < bs; jb++) { 2298 for (ib = 0; ib < bs; ib++) { 2299 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2300 b_val++; 2301 } 2302 } 2303 } 2304 } else if (type == NORM_INFINITY) { 2305 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2306 for (jb = 0; jb < bs; jb++) { 2307 for (ib = 0; ib < bs; ib++) { 2308 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2309 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2310 a_val++; 2311 } 2312 } 2313 } 2314 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2315 for (jb = 0; jb < bs; jb++) { 2316 for (ib = 0; ib < bs; ib++) { 2317 int col = garray[b_aij->j[i]] * bs + jb; 2318 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2319 b_val++; 2320 } 2321 } 2322 } 2323 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2324 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2325 for (jb = 0; jb < bs; jb++) { 2326 for (ib = 0; ib < bs; ib++) { 2327 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2328 a_val++; 2329 } 2330 } 2331 } 2332 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2333 for (jb = 0; jb < bs; jb++) { 2334 for (ib = 0; ib < bs; ib++) { 2335 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2336 b_val++; 2337 } 2338 } 2339 } 2340 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2341 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2342 for (jb = 0; jb < bs; jb++) { 2343 for (ib = 0; ib < bs; ib++) { 2344 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2345 a_val++; 2346 } 2347 } 2348 } 2349 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2350 for (jb = 0; jb < bs; jb++) { 2351 for (ib = 0; ib < bs; ib++) { 2352 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2353 b_val++; 2354 } 2355 } 2356 } 2357 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2358 if (type == NORM_INFINITY) { 2359 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2360 } else { 2361 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2362 } 2363 PetscCall(PetscFree(work)); 2364 if (type == NORM_2) { 2365 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2366 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2367 for (i = 0; i < N; i++) reductions[i] /= m; 2368 } 2369 PetscFunctionReturn(PETSC_SUCCESS); 2370 } 2371 2372 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2373 { 2374 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2375 2376 PetscFunctionBegin; 2377 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2378 A->factorerrortype = a->A->factorerrortype; 2379 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2380 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2381 PetscFunctionReturn(PETSC_SUCCESS); 2382 } 2383 2384 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2385 { 2386 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2387 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2388 2389 PetscFunctionBegin; 2390 if (!Y->preallocated) { 2391 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2392 } else if (!aij->nz) { 2393 PetscInt nonew = aij->nonew; 2394 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2395 aij->nonew = nonew; 2396 } 2397 PetscCall(MatShift_Basic(Y, a)); 2398 PetscFunctionReturn(PETSC_SUCCESS); 2399 } 2400 2401 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2402 { 2403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2404 2405 PetscFunctionBegin; 2406 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2407 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2408 if (d) { 2409 PetscInt rstart; 2410 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2411 *d += rstart / A->rmap->bs; 2412 } 2413 PetscFunctionReturn(PETSC_SUCCESS); 2414 } 2415 2416 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2417 { 2418 PetscFunctionBegin; 2419 *a = ((Mat_MPIBAIJ *)A->data)->A; 2420 PetscFunctionReturn(PETSC_SUCCESS); 2421 } 2422 2423 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2424 { 2425 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2426 2427 PetscFunctionBegin; 2428 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2429 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2430 PetscFunctionReturn(PETSC_SUCCESS); 2431 } 2432 2433 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2434 MatGetRow_MPIBAIJ, 2435 MatRestoreRow_MPIBAIJ, 2436 MatMult_MPIBAIJ, 2437 /* 4*/ MatMultAdd_MPIBAIJ, 2438 MatMultTranspose_MPIBAIJ, 2439 MatMultTransposeAdd_MPIBAIJ, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*10*/ NULL, 2444 NULL, 2445 NULL, 2446 MatSOR_MPIBAIJ, 2447 MatTranspose_MPIBAIJ, 2448 /*15*/ MatGetInfo_MPIBAIJ, 2449 MatEqual_MPIBAIJ, 2450 MatGetDiagonal_MPIBAIJ, 2451 MatDiagonalScale_MPIBAIJ, 2452 MatNorm_MPIBAIJ, 2453 /*20*/ MatAssemblyBegin_MPIBAIJ, 2454 MatAssemblyEnd_MPIBAIJ, 2455 MatSetOption_MPIBAIJ, 2456 MatZeroEntries_MPIBAIJ, 2457 /*24*/ MatZeroRows_MPIBAIJ, 2458 NULL, 2459 NULL, 2460 NULL, 2461 NULL, 2462 /*29*/ MatSetUp_MPI_Hash, 2463 NULL, 2464 NULL, 2465 MatGetDiagonalBlock_MPIBAIJ, 2466 NULL, 2467 /*34*/ MatDuplicate_MPIBAIJ, 2468 NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 /*39*/ MatAXPY_MPIBAIJ, 2473 MatCreateSubMatrices_MPIBAIJ, 2474 MatIncreaseOverlap_MPIBAIJ, 2475 MatGetValues_MPIBAIJ, 2476 MatCopy_MPIBAIJ, 2477 /*44*/ NULL, 2478 MatScale_MPIBAIJ, 2479 MatShift_MPIBAIJ, 2480 NULL, 2481 MatZeroRowsColumns_MPIBAIJ, 2482 /*49*/ NULL, 2483 NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 /*54*/ MatFDColoringCreate_MPIXAIJ, 2488 NULL, 2489 MatSetUnfactored_MPIBAIJ, 2490 MatPermute_MPIBAIJ, 2491 MatSetValuesBlocked_MPIBAIJ, 2492 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2493 MatDestroy_MPIBAIJ, 2494 MatView_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 /*64*/ NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*74*/ NULL, 2508 MatFDColoringApply_BAIJ, 2509 NULL, 2510 NULL, 2511 NULL, 2512 /*79*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 MatLoad_MPIBAIJ, 2517 /*84*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*89*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*94*/ NULL, 2528 NULL, 2529 NULL, 2530 NULL, 2531 NULL, 2532 /*99*/ NULL, 2533 NULL, 2534 NULL, 2535 MatConjugate_MPIBAIJ, 2536 NULL, 2537 /*104*/ NULL, 2538 MatRealPart_MPIBAIJ, 2539 MatImaginaryPart_MPIBAIJ, 2540 NULL, 2541 NULL, 2542 /*109*/ NULL, 2543 NULL, 2544 NULL, 2545 NULL, 2546 MatMissingDiagonal_MPIBAIJ, 2547 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2548 NULL, 2549 MatGetGhosts_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 /*119*/ NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 MatGetMultiProcBlock_MPIBAIJ, 2557 /*124*/ NULL, 2558 MatGetColumnReductions_MPIBAIJ, 2559 MatInvertBlockDiagonal_MPIBAIJ, 2560 NULL, 2561 NULL, 2562 /*129*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*134*/ NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*139*/ MatSetBlockSizes_Default, 2573 NULL, 2574 NULL, 2575 MatFDColoringSetUp_MPIXAIJ, 2576 NULL, 2577 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2578 NULL, 2579 NULL, 2580 NULL, 2581 NULL, 2582 NULL, 2583 /*150*/ NULL, 2584 MatEliminateZeros_MPIBAIJ, 2585 MatGetRowSumAbs_MPIBAIJ}; 2586 2587 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2588 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2589 2590 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2591 { 2592 PetscInt m, rstart, cstart, cend; 2593 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2594 const PetscInt *JJ = NULL; 2595 PetscScalar *values = NULL; 2596 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2597 PetscBool nooffprocentries; 2598 2599 PetscFunctionBegin; 2600 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2601 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2602 PetscCall(PetscLayoutSetUp(B->rmap)); 2603 PetscCall(PetscLayoutSetUp(B->cmap)); 2604 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2605 m = B->rmap->n / bs; 2606 rstart = B->rmap->rstart / bs; 2607 cstart = B->cmap->rstart / bs; 2608 cend = B->cmap->rend / bs; 2609 2610 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2611 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2612 for (i = 0; i < m; i++) { 2613 nz = ii[i + 1] - ii[i]; 2614 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2615 nz_max = PetscMax(nz_max, nz); 2616 dlen = 0; 2617 olen = 0; 2618 JJ = jj + ii[i]; 2619 for (j = 0; j < nz; j++) { 2620 if (*JJ < cstart || *JJ >= cend) olen++; 2621 else dlen++; 2622 JJ++; 2623 } 2624 d_nnz[i] = dlen; 2625 o_nnz[i] = olen; 2626 } 2627 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2628 PetscCall(PetscFree2(d_nnz, o_nnz)); 2629 2630 values = (PetscScalar *)V; 2631 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2632 for (i = 0; i < m; i++) { 2633 PetscInt row = i + rstart; 2634 PetscInt ncols = ii[i + 1] - ii[i]; 2635 const PetscInt *icols = jj + ii[i]; 2636 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2637 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2638 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2639 } else { /* block ordering does not match so we can only insert one block at a time. */ 2640 PetscInt j; 2641 for (j = 0; j < ncols; j++) { 2642 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2643 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2644 } 2645 } 2646 } 2647 2648 if (!V) PetscCall(PetscFree(values)); 2649 nooffprocentries = B->nooffprocentries; 2650 B->nooffprocentries = PETSC_TRUE; 2651 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2652 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2653 B->nooffprocentries = nooffprocentries; 2654 2655 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2656 PetscFunctionReturn(PETSC_SUCCESS); 2657 } 2658 2659 /*@C 2660 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2661 2662 Collective 2663 2664 Input Parameters: 2665 + B - the matrix 2666 . bs - the block size 2667 . i - the indices into `j` for the start of each local row (starts with zero) 2668 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2669 - v - optional values in the matrix, use `NULL` if not provided 2670 2671 Level: advanced 2672 2673 Notes: 2674 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2675 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2676 called this routine. 2677 2678 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2679 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2680 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2681 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2682 block column and the second index is over columns within a block. 2683 2684 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2685 2686 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2687 @*/ 2688 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2689 { 2690 PetscFunctionBegin; 2691 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2692 PetscValidType(B, 1); 2693 PetscValidLogicalCollectiveInt(B, bs, 2); 2694 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2695 PetscFunctionReturn(PETSC_SUCCESS); 2696 } 2697 2698 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2699 { 2700 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2701 PetscInt i; 2702 PetscMPIInt size; 2703 2704 PetscFunctionBegin; 2705 if (B->hash_active) { 2706 B->ops[0] = b->cops; 2707 B->hash_active = PETSC_FALSE; 2708 } 2709 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2710 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2711 PetscCall(PetscLayoutSetUp(B->rmap)); 2712 PetscCall(PetscLayoutSetUp(B->cmap)); 2713 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2714 2715 if (d_nnz) { 2716 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2717 } 2718 if (o_nnz) { 2719 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2720 } 2721 2722 b->bs2 = bs * bs; 2723 b->mbs = B->rmap->n / bs; 2724 b->nbs = B->cmap->n / bs; 2725 b->Mbs = B->rmap->N / bs; 2726 b->Nbs = B->cmap->N / bs; 2727 2728 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2729 b->rstartbs = B->rmap->rstart / bs; 2730 b->rendbs = B->rmap->rend / bs; 2731 b->cstartbs = B->cmap->rstart / bs; 2732 b->cendbs = B->cmap->rend / bs; 2733 2734 #if defined(PETSC_USE_CTABLE) 2735 PetscCall(PetscHMapIDestroy(&b->colmap)); 2736 #else 2737 PetscCall(PetscFree(b->colmap)); 2738 #endif 2739 PetscCall(PetscFree(b->garray)); 2740 PetscCall(VecDestroy(&b->lvec)); 2741 PetscCall(VecScatterDestroy(&b->Mvctx)); 2742 2743 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2744 2745 MatSeqXAIJGetOptions_Private(b->B); 2746 PetscCall(MatDestroy(&b->B)); 2747 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2748 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2749 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2750 MatSeqXAIJRestoreOptions_Private(b->B); 2751 2752 MatSeqXAIJGetOptions_Private(b->A); 2753 PetscCall(MatDestroy(&b->A)); 2754 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2755 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2756 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2757 MatSeqXAIJRestoreOptions_Private(b->A); 2758 2759 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2760 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2761 B->preallocated = PETSC_TRUE; 2762 B->was_assembled = PETSC_FALSE; 2763 B->assembled = PETSC_FALSE; 2764 PetscFunctionReturn(PETSC_SUCCESS); 2765 } 2766 2767 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2768 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2769 2770 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2771 { 2772 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2773 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2774 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2775 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2776 2777 PetscFunctionBegin; 2778 PetscCall(PetscMalloc1(M + 1, &ii)); 2779 ii[0] = 0; 2780 for (i = 0; i < M; i++) { 2781 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2782 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2783 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2784 /* remove one from count of matrix has diagonal */ 2785 for (j = id[i]; j < id[i + 1]; j++) { 2786 if (jd[j] == i) { 2787 ii[i + 1]--; 2788 break; 2789 } 2790 } 2791 } 2792 PetscCall(PetscMalloc1(ii[M], &jj)); 2793 cnt = 0; 2794 for (i = 0; i < M; i++) { 2795 for (j = io[i]; j < io[i + 1]; j++) { 2796 if (garray[jo[j]] > rstart) break; 2797 jj[cnt++] = garray[jo[j]]; 2798 } 2799 for (k = id[i]; k < id[i + 1]; k++) { 2800 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2801 } 2802 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2803 } 2804 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2805 PetscFunctionReturn(PETSC_SUCCESS); 2806 } 2807 2808 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2809 2810 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2811 2812 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2813 { 2814 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2815 Mat_MPIAIJ *b; 2816 Mat B; 2817 2818 PetscFunctionBegin; 2819 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2820 2821 if (reuse == MAT_REUSE_MATRIX) { 2822 B = *newmat; 2823 } else { 2824 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2825 PetscCall(MatSetType(B, MATMPIAIJ)); 2826 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2827 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2828 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2829 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2830 } 2831 b = (Mat_MPIAIJ *)B->data; 2832 2833 if (reuse == MAT_REUSE_MATRIX) { 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2835 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2836 } else { 2837 PetscInt *garray = a->garray; 2838 Mat_SeqAIJ *bB; 2839 PetscInt bs, nnz; 2840 PetscCall(MatDestroy(&b->A)); 2841 PetscCall(MatDestroy(&b->B)); 2842 /* just clear out the data structure */ 2843 PetscCall(MatDisAssemble_MPIAIJ(B)); 2844 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2845 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2846 2847 /* Global numbering for b->B columns */ 2848 bB = (Mat_SeqAIJ *)b->B->data; 2849 bs = A->rmap->bs; 2850 nnz = bB->i[A->rmap->n]; 2851 for (PetscInt k = 0; k < nnz; k++) { 2852 PetscInt bj = bB->j[k] / bs; 2853 PetscInt br = bB->j[k] % bs; 2854 bB->j[k] = garray[bj] * bs + br; 2855 } 2856 } 2857 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2858 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2859 2860 if (reuse == MAT_INPLACE_MATRIX) { 2861 PetscCall(MatHeaderReplace(A, &B)); 2862 } else { 2863 *newmat = B; 2864 } 2865 PetscFunctionReturn(PETSC_SUCCESS); 2866 } 2867 2868 /*MC 2869 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2870 2871 Options Database Keys: 2872 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2873 . -mat_block_size <bs> - set the blocksize used to store the matrix 2874 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2875 - -mat_use_hash_table <fact> - set hash table factor 2876 2877 Level: beginner 2878 2879 Note: 2880 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2881 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2882 2883 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2884 M*/ 2885 2886 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2887 2888 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2889 { 2890 Mat_MPIBAIJ *b; 2891 PetscBool flg = PETSC_FALSE; 2892 2893 PetscFunctionBegin; 2894 PetscCall(PetscNew(&b)); 2895 B->data = (void *)b; 2896 B->ops[0] = MatOps_Values; 2897 B->assembled = PETSC_FALSE; 2898 2899 B->insertmode = NOT_SET_VALUES; 2900 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2901 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2902 2903 /* build local table of row and column ownerships */ 2904 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2905 2906 /* build cache for off array entries formed */ 2907 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2908 2909 b->donotstash = PETSC_FALSE; 2910 b->colmap = NULL; 2911 b->garray = NULL; 2912 b->roworiented = PETSC_TRUE; 2913 2914 /* stuff used in block assembly */ 2915 b->barray = NULL; 2916 2917 /* stuff used for matrix vector multiply */ 2918 b->lvec = NULL; 2919 b->Mvctx = NULL; 2920 2921 /* stuff for MatGetRow() */ 2922 b->rowindices = NULL; 2923 b->rowvalues = NULL; 2924 b->getrowactive = PETSC_FALSE; 2925 2926 /* hash table stuff */ 2927 b->ht = NULL; 2928 b->hd = NULL; 2929 b->ht_size = 0; 2930 b->ht_flag = PETSC_FALSE; 2931 b->ht_fact = 0; 2932 b->ht_total_ct = 0; 2933 b->ht_insert_ct = 0; 2934 2935 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2936 b->ijonly = PETSC_FALSE; 2937 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2941 #if defined(PETSC_HAVE_HYPRE) 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2943 #endif 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2949 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2950 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2951 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2952 2953 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2954 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2955 if (flg) { 2956 PetscReal fact = 1.39; 2957 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2958 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2959 if (fact <= 1.0) fact = 1.39; 2960 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2961 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2962 } 2963 PetscOptionsEnd(); 2964 PetscFunctionReturn(PETSC_SUCCESS); 2965 } 2966 2967 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2968 /*MC 2969 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2970 2971 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2972 and `MATMPIBAIJ` otherwise. 2973 2974 Options Database Keys: 2975 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2976 2977 Level: beginner 2978 2979 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2980 M*/ 2981 2982 /*@C 2983 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2984 (block compressed row). 2985 2986 Collective 2987 2988 Input Parameters: 2989 + B - the matrix 2990 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2991 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2992 . d_nz - number of block nonzeros per block row in diagonal portion of local 2993 submatrix (same for all local rows) 2994 . d_nnz - array containing the number of block nonzeros in the various block rows 2995 of the in diagonal portion of the local (possibly different for each block 2996 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2997 set it even if it is zero. 2998 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2999 submatrix (same for all local rows). 3000 - o_nnz - array containing the number of nonzeros in the various block rows of the 3001 off-diagonal portion of the local submatrix (possibly different for 3002 each block row) or `NULL`. 3003 3004 If the *_nnz parameter is given then the *_nz parameter is ignored 3005 3006 Options Database Keys: 3007 + -mat_block_size - size of the blocks to use 3008 - -mat_use_hash_table <fact> - set hash table factor 3009 3010 Level: intermediate 3011 3012 Notes: 3013 For good matrix assembly performance 3014 the user should preallocate the matrix storage by setting the parameters 3015 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3016 performance can be increased by more than a factor of 50. 3017 3018 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3019 than it must be used on all processors that share the object for that argument. 3020 3021 Storage Information: 3022 For a square global matrix we define each processor's diagonal portion 3023 to be its local rows and the corresponding columns (a square submatrix); 3024 each processor's off-diagonal portion encompasses the remainder of the 3025 local matrix (a rectangular submatrix). 3026 3027 The user can specify preallocated storage for the diagonal part of 3028 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3029 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3030 memory allocation. Likewise, specify preallocated storage for the 3031 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3032 3033 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3034 the figure below we depict these three local rows and all columns (0-11). 3035 3036 .vb 3037 0 1 2 3 4 5 6 7 8 9 10 11 3038 -------------------------- 3039 row 3 |o o o d d d o o o o o o 3040 row 4 |o o o d d d o o o o o o 3041 row 5 |o o o d d d o o o o o o 3042 -------------------------- 3043 .ve 3044 3045 Thus, any entries in the d locations are stored in the d (diagonal) 3046 submatrix, and any entries in the o locations are stored in the 3047 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3048 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3049 3050 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3051 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3052 In general, for PDE problems in which most nonzeros are near the diagonal, 3053 one expects `d_nz` >> `o_nz`. 3054 3055 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3056 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3057 You can also run with the option `-info` and look for messages with the string 3058 malloc in them to see if additional memory allocation was needed. 3059 3060 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3061 @*/ 3062 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3063 { 3064 PetscFunctionBegin; 3065 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3066 PetscValidType(B, 1); 3067 PetscValidLogicalCollectiveInt(B, bs, 2); 3068 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3069 PetscFunctionReturn(PETSC_SUCCESS); 3070 } 3071 3072 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3073 /*@C 3074 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3075 (block compressed row). 3076 3077 Collective 3078 3079 Input Parameters: 3080 + comm - MPI communicator 3081 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3082 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3083 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3084 This value should be the same as the local size used in creating the 3085 y vector for the matrix-vector product y = Ax. 3086 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3087 This value should be the same as the local size used in creating the 3088 x vector for the matrix-vector product y = Ax. 3089 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3090 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3091 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3092 submatrix (same for all local rows) 3093 . d_nnz - array containing the number of nonzero blocks in the various block rows 3094 of the in diagonal portion of the local (possibly different for each block 3095 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3096 and set it even if it is zero. 3097 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3098 submatrix (same for all local rows). 3099 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3100 off-diagonal portion of the local submatrix (possibly different for 3101 each block row) or NULL. 3102 3103 Output Parameter: 3104 . A - the matrix 3105 3106 Options Database Keys: 3107 + -mat_block_size - size of the blocks to use 3108 - -mat_use_hash_table <fact> - set hash table factor 3109 3110 Level: intermediate 3111 3112 Notes: 3113 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3114 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3115 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3116 3117 For good matrix assembly performance 3118 the user should preallocate the matrix storage by setting the parameters 3119 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3120 performance can be increased by more than a factor of 50. 3121 3122 If the *_nnz parameter is given then the *_nz parameter is ignored 3123 3124 A nonzero block is any block that as 1 or more nonzeros in it 3125 3126 The user MUST specify either the local or global matrix dimensions 3127 (possibly both). 3128 3129 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3130 than it must be used on all processors that share the object for that argument. 3131 3132 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3133 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3134 3135 Storage Information: 3136 For a square global matrix we define each processor's diagonal portion 3137 to be its local rows and the corresponding columns (a square submatrix); 3138 each processor's off-diagonal portion encompasses the remainder of the 3139 local matrix (a rectangular submatrix). 3140 3141 The user can specify preallocated storage for the diagonal part of 3142 the local submatrix with either d_nz or d_nnz (not both). Set 3143 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3144 memory allocation. Likewise, specify preallocated storage for the 3145 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3146 3147 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3148 the figure below we depict these three local rows and all columns (0-11). 3149 3150 .vb 3151 0 1 2 3 4 5 6 7 8 9 10 11 3152 -------------------------- 3153 row 3 |o o o d d d o o o o o o 3154 row 4 |o o o d d d o o o o o o 3155 row 5 |o o o d d d o o o o o o 3156 -------------------------- 3157 .ve 3158 3159 Thus, any entries in the d locations are stored in the d (diagonal) 3160 submatrix, and any entries in the o locations are stored in the 3161 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3162 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3163 3164 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3165 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3166 In general, for PDE problems in which most nonzeros are near the diagonal, 3167 one expects `d_nz` >> `o_nz`. 3168 3169 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3170 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3171 @*/ 3172 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3173 { 3174 PetscMPIInt size; 3175 3176 PetscFunctionBegin; 3177 PetscCall(MatCreate(comm, A)); 3178 PetscCall(MatSetSizes(*A, m, n, M, N)); 3179 PetscCallMPI(MPI_Comm_size(comm, &size)); 3180 if (size > 1) { 3181 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3182 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3183 } else { 3184 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3185 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3186 } 3187 PetscFunctionReturn(PETSC_SUCCESS); 3188 } 3189 3190 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3191 { 3192 Mat mat; 3193 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3194 PetscInt len = 0; 3195 3196 PetscFunctionBegin; 3197 *newmat = NULL; 3198 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3199 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3200 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3201 3202 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3203 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3204 if (matin->hash_active) { 3205 PetscCall(MatSetUp(mat)); 3206 } else { 3207 mat->factortype = matin->factortype; 3208 mat->preallocated = PETSC_TRUE; 3209 mat->assembled = PETSC_TRUE; 3210 mat->insertmode = NOT_SET_VALUES; 3211 3212 a = (Mat_MPIBAIJ *)mat->data; 3213 mat->rmap->bs = matin->rmap->bs; 3214 a->bs2 = oldmat->bs2; 3215 a->mbs = oldmat->mbs; 3216 a->nbs = oldmat->nbs; 3217 a->Mbs = oldmat->Mbs; 3218 a->Nbs = oldmat->Nbs; 3219 3220 a->size = oldmat->size; 3221 a->rank = oldmat->rank; 3222 a->donotstash = oldmat->donotstash; 3223 a->roworiented = oldmat->roworiented; 3224 a->rowindices = NULL; 3225 a->rowvalues = NULL; 3226 a->getrowactive = PETSC_FALSE; 3227 a->barray = NULL; 3228 a->rstartbs = oldmat->rstartbs; 3229 a->rendbs = oldmat->rendbs; 3230 a->cstartbs = oldmat->cstartbs; 3231 a->cendbs = oldmat->cendbs; 3232 3233 /* hash table stuff */ 3234 a->ht = NULL; 3235 a->hd = NULL; 3236 a->ht_size = 0; 3237 a->ht_flag = oldmat->ht_flag; 3238 a->ht_fact = oldmat->ht_fact; 3239 a->ht_total_ct = 0; 3240 a->ht_insert_ct = 0; 3241 3242 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3243 if (oldmat->colmap) { 3244 #if defined(PETSC_USE_CTABLE) 3245 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3246 #else 3247 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3248 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3249 #endif 3250 } else a->colmap = NULL; 3251 3252 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3253 PetscCall(PetscMalloc1(len, &a->garray)); 3254 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3255 } else a->garray = NULL; 3256 3257 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3258 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3259 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3260 3261 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3262 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3263 } 3264 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3265 *newmat = mat; 3266 PetscFunctionReturn(PETSC_SUCCESS); 3267 } 3268 3269 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3270 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3271 { 3272 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3273 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3274 PetscScalar *matvals; 3275 3276 PetscFunctionBegin; 3277 PetscCall(PetscViewerSetUp(viewer)); 3278 3279 /* read in matrix header */ 3280 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3281 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3282 M = header[1]; 3283 N = header[2]; 3284 nz = header[3]; 3285 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3286 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3287 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3288 3289 /* set block sizes from the viewer's .info file */ 3290 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3291 /* set local sizes if not set already */ 3292 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3293 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3294 /* set global sizes if not set already */ 3295 if (mat->rmap->N < 0) mat->rmap->N = M; 3296 if (mat->cmap->N < 0) mat->cmap->N = N; 3297 PetscCall(PetscLayoutSetUp(mat->rmap)); 3298 PetscCall(PetscLayoutSetUp(mat->cmap)); 3299 3300 /* check if the matrix sizes are correct */ 3301 PetscCall(MatGetSize(mat, &rows, &cols)); 3302 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3303 PetscCall(MatGetBlockSize(mat, &bs)); 3304 PetscCall(MatGetLocalSize(mat, &m, &n)); 3305 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3306 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3307 mbs = m / bs; 3308 nbs = n / bs; 3309 3310 /* read in row lengths and build row indices */ 3311 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3312 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3313 rowidxs[0] = 0; 3314 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3315 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3316 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3317 3318 /* read in column indices and matrix values */ 3319 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3320 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3321 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3322 3323 { /* preallocate matrix storage */ 3324 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3325 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3326 PetscBool sbaij, done; 3327 PetscInt *d_nnz, *o_nnz; 3328 3329 PetscCall(PetscBTCreate(nbs, &bt)); 3330 PetscCall(PetscHSetICreate(&ht)); 3331 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3332 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3333 for (i = 0; i < mbs; i++) { 3334 PetscCall(PetscBTMemzero(nbs, bt)); 3335 PetscCall(PetscHSetIClear(ht)); 3336 for (k = 0; k < bs; k++) { 3337 PetscInt row = bs * i + k; 3338 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3339 PetscInt col = colidxs[j]; 3340 if (!sbaij || col >= row) { 3341 if (col >= cs && col < ce) { 3342 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3343 } else { 3344 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3345 if (done) o_nnz[i]++; 3346 } 3347 } 3348 } 3349 } 3350 } 3351 PetscCall(PetscBTDestroy(&bt)); 3352 PetscCall(PetscHSetIDestroy(&ht)); 3353 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3354 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3355 PetscCall(PetscFree2(d_nnz, o_nnz)); 3356 } 3357 3358 /* store matrix values */ 3359 for (i = 0; i < m; i++) { 3360 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3361 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3362 } 3363 3364 PetscCall(PetscFree(rowidxs)); 3365 PetscCall(PetscFree2(colidxs, matvals)); 3366 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3367 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3368 PetscFunctionReturn(PETSC_SUCCESS); 3369 } 3370 3371 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3372 { 3373 PetscBool isbinary; 3374 3375 PetscFunctionBegin; 3376 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3377 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3378 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3379 PetscFunctionReturn(PETSC_SUCCESS); 3380 } 3381 3382 /*@ 3383 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3384 3385 Input Parameters: 3386 + mat - the matrix 3387 - fact - factor 3388 3389 Options Database Key: 3390 . -mat_use_hash_table <fact> - provide the factor 3391 3392 Level: advanced 3393 3394 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3395 @*/ 3396 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3397 { 3398 PetscFunctionBegin; 3399 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } 3402 3403 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3404 { 3405 Mat_MPIBAIJ *baij; 3406 3407 PetscFunctionBegin; 3408 baij = (Mat_MPIBAIJ *)mat->data; 3409 baij->ht_fact = fact; 3410 PetscFunctionReturn(PETSC_SUCCESS); 3411 } 3412 3413 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3414 { 3415 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3416 PetscBool flg; 3417 3418 PetscFunctionBegin; 3419 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3420 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3421 if (Ad) *Ad = a->A; 3422 if (Ao) *Ao = a->B; 3423 if (colmap) *colmap = a->garray; 3424 PetscFunctionReturn(PETSC_SUCCESS); 3425 } 3426 3427 /* 3428 Special version for direct calls from Fortran (to eliminate two function call overheads 3429 */ 3430 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3431 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3432 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3433 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3434 #endif 3435 3436 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3437 /*@C 3438 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3439 3440 Collective 3441 3442 Input Parameters: 3443 + matin - the matrix 3444 . min - number of input rows 3445 . im - input rows 3446 . nin - number of input columns 3447 . in - input columns 3448 . v - numerical values input 3449 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3450 3451 Level: advanced 3452 3453 Developer Notes: 3454 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3455 3456 .seealso: `Mat`, `MatSetValuesBlocked()` 3457 @*/ 3458 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3459 { 3460 /* convert input arguments to C version */ 3461 Mat mat = *matin; 3462 PetscInt m = *min, n = *nin; 3463 InsertMode addv = *addvin; 3464 3465 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3466 const MatScalar *value; 3467 MatScalar *barray = baij->barray; 3468 PetscBool roworiented = baij->roworiented; 3469 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3470 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3471 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3472 3473 PetscFunctionBegin; 3474 /* tasks normally handled by MatSetValuesBlocked() */ 3475 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3476 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3477 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3478 if (mat->assembled) { 3479 mat->was_assembled = PETSC_TRUE; 3480 mat->assembled = PETSC_FALSE; 3481 } 3482 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3483 3484 if (!barray) { 3485 PetscCall(PetscMalloc1(bs2, &barray)); 3486 baij->barray = barray; 3487 } 3488 3489 if (roworiented) stepval = (n - 1) * bs; 3490 else stepval = (m - 1) * bs; 3491 3492 for (i = 0; i < m; i++) { 3493 if (im[i] < 0) continue; 3494 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3495 if (im[i] >= rstart && im[i] < rend) { 3496 row = im[i] - rstart; 3497 for (j = 0; j < n; j++) { 3498 /* If NumCol = 1 then a copy is not required */ 3499 if ((roworiented) && (n == 1)) { 3500 barray = (MatScalar *)v + i * bs2; 3501 } else if ((!roworiented) && (m == 1)) { 3502 barray = (MatScalar *)v + j * bs2; 3503 } else { /* Here a copy is required */ 3504 if (roworiented) { 3505 value = v + i * (stepval + bs) * bs + j * bs; 3506 } else { 3507 value = v + j * (stepval + bs) * bs + i * bs; 3508 } 3509 for (ii = 0; ii < bs; ii++, value += stepval) { 3510 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3511 } 3512 barray -= bs2; 3513 } 3514 3515 if (in[j] >= cstart && in[j] < cend) { 3516 col = in[j] - cstart; 3517 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3518 } else if (in[j] < 0) { 3519 continue; 3520 } else { 3521 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3522 if (mat->was_assembled) { 3523 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3524 3525 #if defined(PETSC_USE_DEBUG) 3526 #if defined(PETSC_USE_CTABLE) 3527 { 3528 PetscInt data; 3529 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3530 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3531 } 3532 #else 3533 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3534 #endif 3535 #endif 3536 #if defined(PETSC_USE_CTABLE) 3537 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3538 col = (col - 1) / bs; 3539 #else 3540 col = (baij->colmap[in[j]] - 1) / bs; 3541 #endif 3542 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3543 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3544 col = in[j]; 3545 } 3546 } else col = in[j]; 3547 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3548 } 3549 } 3550 } else { 3551 if (!baij->donotstash) { 3552 if (roworiented) { 3553 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3554 } else { 3555 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3556 } 3557 } 3558 } 3559 } 3560 3561 /* task normally handled by MatSetValuesBlocked() */ 3562 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3563 PetscFunctionReturn(PETSC_SUCCESS); 3564 } 3565 3566 /*@ 3567 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3568 3569 Collective 3570 3571 Input Parameters: 3572 + comm - MPI communicator 3573 . bs - the block size, only a block size of 1 is supported 3574 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3575 . n - This value should be the same as the local size used in creating the 3576 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3577 calculated if `N` is given) For square matrices `n` is almost always `m`. 3578 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3579 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3580 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3581 . j - column indices 3582 - a - matrix values 3583 3584 Output Parameter: 3585 . mat - the matrix 3586 3587 Level: intermediate 3588 3589 Notes: 3590 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3591 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3592 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3593 3594 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3595 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3596 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3597 with column-major ordering within blocks. 3598 3599 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3600 3601 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3602 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3603 @*/ 3604 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3605 { 3606 PetscFunctionBegin; 3607 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3608 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3609 PetscCall(MatCreate(comm, mat)); 3610 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3611 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3612 PetscCall(MatSetBlockSize(*mat, bs)); 3613 PetscCall(MatSetUp(*mat)); 3614 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3615 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3616 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3617 PetscFunctionReturn(PETSC_SUCCESS); 3618 } 3619 3620 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3621 { 3622 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3623 PetscInt *indx; 3624 PetscScalar *values; 3625 3626 PetscFunctionBegin; 3627 PetscCall(MatGetSize(inmat, &m, &N)); 3628 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3629 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3630 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3631 PetscInt *bindx, rmax = a->rmax, j; 3632 PetscMPIInt rank, size; 3633 3634 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3635 mbs = m / bs; 3636 Nbs = N / cbs; 3637 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3638 nbs = n / cbs; 3639 3640 PetscCall(PetscMalloc1(rmax, &bindx)); 3641 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3642 3643 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3644 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3645 if (rank == size - 1) { 3646 /* Check sum(nbs) = Nbs */ 3647 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3648 } 3649 3650 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3651 for (i = 0; i < mbs; i++) { 3652 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3653 nnz = nnz / bs; 3654 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3655 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3656 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3657 } 3658 PetscCall(PetscFree(bindx)); 3659 3660 PetscCall(MatCreate(comm, outmat)); 3661 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3662 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3663 PetscCall(MatSetType(*outmat, MATBAIJ)); 3664 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3665 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3666 MatPreallocateEnd(dnz, onz); 3667 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3668 } 3669 3670 /* numeric phase */ 3671 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3672 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3673 3674 for (i = 0; i < m; i++) { 3675 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3676 Ii = i + rstart; 3677 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3678 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3679 } 3680 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3681 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3682 PetscFunctionReturn(PETSC_SUCCESS); 3683 } 3684