1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscMPIInt iN; 721 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 749 PetscCallMPI(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 750 *nrm = 0.0; 751 for (j = 0; j < mat->cmap->N; j++) { 752 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 753 } 754 PetscCall(PetscFree(tmp)); 755 PetscCall(PetscFree(tmp2)); 756 } else if (type == NORM_INFINITY) { /* max row sum */ 757 PetscReal *sums; 758 PetscCall(PetscMalloc1(bs, &sums)); 759 sum = 0.0; 760 for (j = 0; j < amat->mbs; j++) { 761 for (row = 0; row < bs; row++) sums[row] = 0.0; 762 v = amat->a + bs2 * amat->i[j]; 763 nz = amat->i[j + 1] - amat->i[j]; 764 for (i = 0; i < nz; i++) { 765 for (col = 0; col < bs; col++) { 766 for (row = 0; row < bs; row++) { 767 sums[row] += PetscAbsScalar(*v); 768 v++; 769 } 770 } 771 } 772 v = bmat->a + bs2 * bmat->i[j]; 773 nz = bmat->i[j + 1] - bmat->i[j]; 774 for (i = 0; i < nz; i++) { 775 for (col = 0; col < bs; col++) { 776 for (row = 0; row < bs; row++) { 777 sums[row] += PetscAbsScalar(*v); 778 v++; 779 } 780 } 781 } 782 for (row = 0; row < bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 787 PetscCall(PetscFree(sums)); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 789 } 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 803 Mat A = baij->A, B = baij->B; 804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 805 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 806 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 807 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 808 PetscInt *HT, key; 809 MatScalar **HD; 810 PetscReal tmp; 811 #if defined(PETSC_USE_INFO) 812 PetscInt ct = 0, max = 0; 813 #endif 814 815 PetscFunctionBegin; 816 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 817 818 baij->ht_size = (PetscInt)(factor * nz); 819 ht_size = baij->ht_size; 820 821 /* Allocate Memory for Hash Table */ 822 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 823 HD = baij->hd; 824 HT = baij->ht; 825 826 /* Loop Over A */ 827 for (i = 0; i < a->mbs; i++) { 828 for (j = ai[i]; j < ai[i + 1]; j++) { 829 row = i + rstart; 830 col = aj[j] + cstart; 831 832 key = row * Nbs + col + 1; 833 h1 = HASH(ht_size, key, tmp); 834 for (k = 0; k < ht_size; k++) { 835 if (!HT[(h1 + k) % ht_size]) { 836 HT[(h1 + k) % ht_size] = key; 837 HD[(h1 + k) % ht_size] = a->a + j * bs2; 838 break; 839 #if defined(PETSC_USE_INFO) 840 } else { 841 ct++; 842 #endif 843 } 844 } 845 #if defined(PETSC_USE_INFO) 846 if (k > max) max = k; 847 #endif 848 } 849 } 850 /* Loop Over B */ 851 for (i = 0; i < b->mbs; i++) { 852 for (j = bi[i]; j < bi[i + 1]; j++) { 853 row = i + rstart; 854 col = garray[bj[j]]; 855 key = row * Nbs + col + 1; 856 h1 = HASH(ht_size, key, tmp); 857 for (k = 0; k < ht_size; k++) { 858 if (!HT[(h1 + k) % ht_size]) { 859 HT[(h1 + k) % ht_size] = key; 860 HD[(h1 + k) % ht_size] = b->a + j * bs2; 861 break; 862 #if defined(PETSC_USE_INFO) 863 } else { 864 ct++; 865 #endif 866 } 867 } 868 #if defined(PETSC_USE_INFO) 869 if (k > max) max = k; 870 #endif 871 } 872 } 873 874 /* Print Summary */ 875 #if defined(PETSC_USE_INFO) 876 for (i = 0, j = 0; i < ht_size; i++) { 877 if (HT[i]) j++; 878 } 879 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 880 #endif 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 885 { 886 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 887 PetscInt nstash, reallocs; 888 889 PetscFunctionBegin; 890 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 891 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 894 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 895 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 896 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 897 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 902 { 903 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 905 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 906 PetscInt *row, *col; 907 PetscBool r1, r2, r3, other_disassembled; 908 MatScalar *val; 909 PetscMPIInt n; 910 911 PetscFunctionBegin; 912 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 913 if (!baij->donotstash && !mat->nooffprocentries) { 914 while (1) { 915 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 916 if (!flg) break; 917 918 for (i = 0; i < n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j = i, rstart = row[j]; j < n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j - i; 924 else ncols = n - i; 925 /* Now assemble all these values with a single function call */ 926 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 927 i = j; 928 } 929 } 930 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 931 /* Now process the block-stash. Since the values are stashed column-oriented, 932 set the row-oriented flag to column-oriented, and after MatSetValues() 933 restore the original flags */ 934 r1 = baij->roworiented; 935 r2 = a->roworiented; 936 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 937 938 baij->roworiented = PETSC_FALSE; 939 a->roworiented = PETSC_FALSE; 940 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 961 } 962 963 PetscCall(MatAssemblyBegin(baij->A, mode)); 964 PetscCall(MatAssemblyEnd(baij->A, mode)); 965 966 /* determine if any processor has disassembled, if so we must 967 also disassemble ourselves, in order that we may reassemble. */ 968 /* 969 if nonzero structure of submatrix B cannot change then we know that 970 no processor disassembled thus we can skip this stuff 971 */ 972 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 973 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 974 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 975 } 976 977 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 978 PetscCall(MatAssemblyBegin(baij->B, mode)); 979 PetscCall(MatAssemblyEnd(baij->B, mode)); 980 981 #if defined(PETSC_USE_INFO) 982 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 983 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 984 985 baij->ht_total_ct = 0; 986 baij->ht_insert_ct = 0; 987 } 988 #endif 989 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 990 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 991 992 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 993 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 994 } 995 996 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 997 998 baij->rowvalues = NULL; 999 1000 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1001 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1002 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1003 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1009 #include <petscdraw.h> 1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1011 { 1012 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1013 PetscMPIInt rank = baij->rank; 1014 PetscInt bs = mat->rmap->bs; 1015 PetscBool iascii, isdraw; 1016 PetscViewer sviewer; 1017 PetscViewerFormat format; 1018 1019 PetscFunctionBegin; 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1022 if (iascii) { 1023 PetscCall(PetscViewerGetFormat(viewer, &format)); 1024 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1025 MatInfo info; 1026 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1027 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1030 mat->rmap->bs, (double)info.memory)); 1031 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1035 PetscCall(PetscViewerFlush(viewer)); 1036 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1037 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1038 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1041 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 } 1047 1048 if (isdraw) { 1049 PetscDraw draw; 1050 PetscBool isnull; 1051 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1052 PetscCall(PetscDrawIsNull(draw, &isnull)); 1053 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 { 1057 /* assemble the entire matrix onto first processor. */ 1058 Mat A; 1059 Mat_SeqBAIJ *Aloc; 1060 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1061 MatScalar *a; 1062 const char *matname; 1063 1064 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1065 /* Perhaps this should be the type of mat? */ 1066 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1067 if (rank == 0) { 1068 PetscCall(MatSetSizes(A, M, N, M, N)); 1069 } else { 1070 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1071 } 1072 PetscCall(MatSetType(A, MATMPIBAIJ)); 1073 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1074 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1075 1076 /* copy over the A part */ 1077 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1078 ai = Aloc->i; 1079 aj = Aloc->j; 1080 a = Aloc->a; 1081 PetscCall(PetscMalloc1(bs, &rvals)); 1082 1083 for (i = 0; i < mbs; i++) { 1084 rvals[0] = bs * (baij->rstartbs + i); 1085 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1086 for (j = ai[i]; j < ai[i + 1]; j++) { 1087 col = (baij->cstartbs + aj[j]) * bs; 1088 for (k = 0; k < bs; k++) { 1089 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1090 col++; 1091 a += bs; 1092 } 1093 } 1094 } 1095 /* copy over the B part */ 1096 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1097 ai = Aloc->i; 1098 aj = Aloc->j; 1099 a = Aloc->a; 1100 for (i = 0; i < mbs; i++) { 1101 rvals[0] = bs * (baij->rstartbs + i); 1102 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1103 for (j = ai[i]; j < ai[i + 1]; j++) { 1104 col = baij->garray[aj[j]] * bs; 1105 for (k = 0; k < bs; k++) { 1106 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1107 col++; 1108 a += bs; 1109 } 1110 } 1111 } 1112 PetscCall(PetscFree(rvals)); 1113 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1114 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1115 /* 1116 Everyone has to call to draw the matrix since the graphics waits are 1117 synchronized across all processors that share the PetscDraw object 1118 */ 1119 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1121 if (rank == 0) { 1122 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1123 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1124 } 1125 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1126 PetscCall(MatDestroy(&A)); 1127 } 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1133 { 1134 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1135 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1136 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1137 const PetscInt *garray = aij->garray; 1138 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1139 PetscCount nz, hnz; 1140 PetscInt *rowlens, *colidxs; 1141 PetscScalar *matvals; 1142 PetscMPIInt rank; 1143 1144 PetscFunctionBegin; 1145 PetscCall(PetscViewerSetUp(viewer)); 1146 1147 M = mat->rmap->N; 1148 N = mat->cmap->N; 1149 m = mat->rmap->n; 1150 rs = mat->rmap->rstart; 1151 cs = mat->cmap->rstart; 1152 bs = mat->rmap->bs; 1153 nz = bs * bs * (A->nz + B->nz); 1154 1155 /* write matrix header */ 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = M; 1158 header[2] = N; 1159 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1160 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1161 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1162 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1163 1164 /* fill in and store row lengths */ 1165 PetscCall(PetscMalloc1(m, &rowlens)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) 1167 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1168 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1169 PetscCall(PetscFree(rowlens)); 1170 1171 /* fill in and store column indices */ 1172 PetscCall(PetscMalloc1(nz, &colidxs)); 1173 for (cnt = 0, i = 0; i < A->mbs; i++) { 1174 for (k = 0; k < bs; k++) { 1175 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1176 if (garray[B->j[jb]] > cs / bs) break; 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1178 } 1179 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1180 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1181 for (; jb < B->i[i + 1]; jb++) 1182 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1183 } 1184 } 1185 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1186 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1187 PetscCall(PetscFree(colidxs)); 1188 1189 /* fill in and store nonzero values */ 1190 PetscCall(PetscMalloc1(nz, &matvals)); 1191 for (cnt = 0, i = 0; i < A->mbs; i++) { 1192 for (k = 0; k < bs; k++) { 1193 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1194 if (garray[B->j[jb]] > cs / bs) break; 1195 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1196 } 1197 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1198 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1199 for (; jb < B->i[i + 1]; jb++) 1200 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1201 } 1202 } 1203 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1204 PetscCall(PetscFree(matvals)); 1205 1206 /* write block size option to the viewer's .info file */ 1207 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1212 { 1213 PetscBool iascii, isdraw, issocket, isbinary; 1214 1215 PetscFunctionBegin; 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1220 if (iascii || isdraw || issocket) { 1221 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1222 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1227 { 1228 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1229 PetscInt nt; 1230 1231 PetscFunctionBegin; 1232 PetscCall(VecGetLocalSize(xx, &nt)); 1233 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1234 PetscCall(VecGetLocalSize(yy, &nt)); 1235 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1236 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1237 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1238 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1239 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1246 1247 PetscFunctionBegin; 1248 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1249 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1250 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1251 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1256 { 1257 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1258 1259 PetscFunctionBegin; 1260 /* do nondiagonal part */ 1261 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1262 /* do local part */ 1263 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1264 /* add partial results together */ 1265 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1271 { 1272 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1273 1274 PetscFunctionBegin; 1275 /* do nondiagonal part */ 1276 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1277 /* do local part */ 1278 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1279 /* add partial results together */ 1280 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 /* 1286 This only works correctly for square matrices where the subblock A->A is the 1287 diagonal block 1288 */ 1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1290 { 1291 PetscFunctionBegin; 1292 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1293 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1300 1301 PetscFunctionBegin; 1302 PetscCall(MatScale(a->A, aa)); 1303 PetscCall(MatScale(a->B, aa)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1308 { 1309 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1310 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1311 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1312 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1313 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1314 1315 PetscFunctionBegin; 1316 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1317 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1318 mat->getrowactive = PETSC_TRUE; 1319 1320 if (!mat->rowvalues && (idx || v)) { 1321 /* 1322 allocate enough space to hold information from the longest row. 1323 */ 1324 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1325 PetscInt max = 1, mbs = mat->mbs, tmp; 1326 for (i = 0; i < mbs; i++) { 1327 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1328 if (max < tmp) max = tmp; 1329 } 1330 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1331 } 1332 lrow = row - brstart; 1333 1334 pvA = &vworkA; 1335 pcA = &cworkA; 1336 pvB = &vworkB; 1337 pcB = &cworkB; 1338 if (!v) { 1339 pvA = NULL; 1340 pvB = NULL; 1341 } 1342 if (!idx) { 1343 pcA = NULL; 1344 if (!v) pcB = NULL; 1345 } 1346 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1347 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1348 nztot = nzA + nzB; 1349 1350 cmap = mat->garray; 1351 if (v || idx) { 1352 if (nztot) { 1353 /* Sort by increasing column numbers, assuming A and B already sorted */ 1354 PetscInt imark = -1; 1355 if (v) { 1356 *v = v_p = mat->rowvalues; 1357 for (i = 0; i < nzB; i++) { 1358 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1359 else break; 1360 } 1361 imark = i; 1362 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1363 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1364 } 1365 if (idx) { 1366 *idx = idx_p = mat->rowindices; 1367 if (imark > -1) { 1368 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 } else { 1370 for (i = 0; i < nzB; i++) { 1371 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1372 else break; 1373 } 1374 imark = i; 1375 } 1376 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1377 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1378 } 1379 } else { 1380 if (idx) *idx = NULL; 1381 if (v) *v = NULL; 1382 } 1383 } 1384 *nz = nztot; 1385 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1386 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1391 { 1392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1393 1394 PetscFunctionBegin; 1395 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1396 baij->getrowactive = PETSC_FALSE; 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1401 { 1402 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatZeroEntries(l->A)); 1406 PetscCall(MatZeroEntries(l->B)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1413 Mat A = a->A, B = a->B; 1414 PetscLogDouble isend[5], irecv[5]; 1415 1416 PetscFunctionBegin; 1417 info->block_size = (PetscReal)matin->rmap->bs; 1418 1419 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1420 1421 isend[0] = info->nz_used; 1422 isend[1] = info->nz_allocated; 1423 isend[2] = info->nz_unneeded; 1424 isend[3] = info->memory; 1425 isend[4] = info->mallocs; 1426 1427 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1428 1429 isend[0] += info->nz_used; 1430 isend[1] += info->nz_allocated; 1431 isend[2] += info->nz_unneeded; 1432 isend[3] += info->memory; 1433 isend[4] += info->mallocs; 1434 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1465 { 1466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1467 1468 PetscFunctionBegin; 1469 switch (op) { 1470 case MAT_NEW_NONZERO_LOCATIONS: 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 case MAT_KEEP_NONZERO_PATTERN: 1474 case MAT_NEW_NONZERO_LOCATION_ERR: 1475 MatCheckPreallocated(A, 1); 1476 PetscCall(MatSetOption(a->A, op, flg)); 1477 PetscCall(MatSetOption(a->B, op, flg)); 1478 break; 1479 case MAT_ROW_ORIENTED: 1480 MatCheckPreallocated(A, 1); 1481 a->roworiented = flg; 1482 1483 PetscCall(MatSetOption(a->A, op, flg)); 1484 PetscCall(MatSetOption(a->B, op, flg)); 1485 break; 1486 case MAT_FORCE_DIAGONAL_ENTRIES: 1487 case MAT_SORTED_FULL: 1488 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 a->ht_fact = 1.39; 1496 break; 1497 case MAT_SYMMETRIC: 1498 case MAT_STRUCTURALLY_SYMMETRIC: 1499 case MAT_HERMITIAN: 1500 case MAT_SUBMAT_SINGLEIS: 1501 case MAT_SYMMETRY_ETERNAL: 1502 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1503 case MAT_SPD_ETERNAL: 1504 /* if the diagonal matrix is square it inherits some of the properties above */ 1505 break; 1506 default: 1507 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1508 } 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1513 { 1514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1515 Mat_SeqBAIJ *Aloc; 1516 Mat B; 1517 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1518 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1519 MatScalar *a; 1520 1521 PetscFunctionBegin; 1522 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1523 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1524 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1525 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1526 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1527 /* Do not know preallocation information, but must set block size */ 1528 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1529 } else { 1530 B = *matout; 1531 } 1532 1533 /* copy over the A part */ 1534 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1535 ai = Aloc->i; 1536 aj = Aloc->j; 1537 a = Aloc->a; 1538 PetscCall(PetscMalloc1(bs, &rvals)); 1539 1540 for (i = 0; i < mbs; i++) { 1541 rvals[0] = bs * (baij->rstartbs + i); 1542 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1543 for (j = ai[i]; j < ai[i + 1]; j++) { 1544 col = (baij->cstartbs + aj[j]) * bs; 1545 for (k = 0; k < bs; k++) { 1546 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1547 1548 col++; 1549 a += bs; 1550 } 1551 } 1552 } 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1555 ai = Aloc->i; 1556 aj = Aloc->j; 1557 a = Aloc->a; 1558 for (i = 0; i < mbs; i++) { 1559 rvals[0] = bs * (baij->rstartbs + i); 1560 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1561 for (j = ai[i]; j < ai[i + 1]; j++) { 1562 col = baij->garray[aj[j]] * bs; 1563 for (k = 0; k < bs; k++) { 1564 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1565 col++; 1566 a += bs; 1567 } 1568 } 1569 } 1570 PetscCall(PetscFree(rvals)); 1571 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1573 1574 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1575 else PetscCall(MatHeaderMerge(A, &B)); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1580 { 1581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1582 Mat a = baij->A, b = baij->B; 1583 PetscInt s1, s2, s3; 1584 1585 PetscFunctionBegin; 1586 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1587 if (rr) { 1588 PetscCall(VecGetLocalSize(rr, &s1)); 1589 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1590 /* Overlap communication with computation. */ 1591 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1592 } 1593 if (ll) { 1594 PetscCall(VecGetLocalSize(ll, &s1)); 1595 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1596 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1597 } 1598 /* scale the diagonal block */ 1599 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1600 1601 if (rr) { 1602 /* Do a scatter end and then right scale the off-diagonal block */ 1603 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1604 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1605 } 1606 PetscFunctionReturn(PETSC_SUCCESS); 1607 } 1608 1609 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1610 { 1611 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1612 PetscInt *lrows; 1613 PetscInt r, len; 1614 PetscBool cong; 1615 1616 PetscFunctionBegin; 1617 /* get locally owned rows */ 1618 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1619 /* fix right-hand side if needed */ 1620 if (x && b) { 1621 const PetscScalar *xx; 1622 PetscScalar *bb; 1623 1624 PetscCall(VecGetArrayRead(x, &xx)); 1625 PetscCall(VecGetArray(b, &bb)); 1626 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1627 PetscCall(VecRestoreArrayRead(x, &xx)); 1628 PetscCall(VecRestoreArray(b, &bb)); 1629 } 1630 1631 /* actually zap the local rows */ 1632 /* 1633 Zero the required rows. If the "diagonal block" of the matrix 1634 is square and the user wishes to set the diagonal we use separate 1635 code so that MatSetValues() is not called for each diagonal allocating 1636 new memory, thus calling lots of mallocs and slowing things down. 1637 1638 */ 1639 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1640 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1641 PetscCall(MatHasCongruentLayouts(A, &cong)); 1642 if ((diag != 0.0) && cong) { 1643 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1644 } else if (diag != 0.0) { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1646 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1647 for (r = 0; r < len; ++r) { 1648 const PetscInt row = lrows[r] + A->rmap->rstart; 1649 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1650 } 1651 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1653 } else { 1654 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1655 } 1656 PetscCall(PetscFree(lrows)); 1657 1658 /* only change matrix nonzero state if pattern was allowed to be changed */ 1659 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1660 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1661 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1662 } 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1669 PetscMPIInt n, p = 0; 1670 PetscInt i, j, k, r, len = 0, row, col, count; 1671 PetscInt *lrows, *owners = A->rmap->range; 1672 PetscSFNode *rrows; 1673 PetscSF sf; 1674 const PetscScalar *xx; 1675 PetscScalar *bb, *mask; 1676 Vec xmask, lmask; 1677 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1678 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1679 PetscScalar *aa; 1680 1681 PetscFunctionBegin; 1682 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1683 /* Create SF where leaves are input rows and roots are owned rows */ 1684 PetscCall(PetscMalloc1(n, &lrows)); 1685 for (r = 0; r < n; ++r) lrows[r] = -1; 1686 PetscCall(PetscMalloc1(N, &rrows)); 1687 for (r = 0; r < N; ++r) { 1688 const PetscInt idx = rows[r]; 1689 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1690 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1691 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1692 } 1693 rrows[r].rank = p; 1694 rrows[r].index = rows[r] - owners[p]; 1695 } 1696 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1697 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1698 /* Collect flags for rows to be zeroed */ 1699 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1700 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFDestroy(&sf)); 1702 /* Compress and put in row numbers */ 1703 for (r = 0; r < n; ++r) 1704 if (lrows[r] >= 0) lrows[len++] = r; 1705 /* zero diagonal part of matrix */ 1706 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1707 /* handle off-diagonal part of matrix */ 1708 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1709 PetscCall(VecDuplicate(l->lvec, &lmask)); 1710 PetscCall(VecGetArray(xmask, &bb)); 1711 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1712 PetscCall(VecRestoreArray(xmask, &bb)); 1713 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecDestroy(&xmask)); 1716 if (x) { 1717 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1720 PetscCall(VecGetArray(b, &bb)); 1721 } 1722 PetscCall(VecGetArray(lmask, &mask)); 1723 /* remove zeroed rows of off-diagonal matrix */ 1724 for (i = 0; i < len; ++i) { 1725 row = lrows[i]; 1726 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1727 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1728 for (k = 0; k < count; ++k) { 1729 aa[0] = 0.0; 1730 aa += bs; 1731 } 1732 } 1733 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1734 for (i = 0; i < l->B->rmap->N; ++i) { 1735 row = i / bs; 1736 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1737 for (k = 0; k < bs; ++k) { 1738 col = bs * baij->j[j] + k; 1739 if (PetscAbsScalar(mask[col])) { 1740 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1741 if (x) bb[i] -= aa[0] * xx[col]; 1742 aa[0] = 0.0; 1743 } 1744 } 1745 } 1746 } 1747 if (x) { 1748 PetscCall(VecRestoreArray(b, &bb)); 1749 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1750 } 1751 PetscCall(VecRestoreArray(lmask, &mask)); 1752 PetscCall(VecDestroy(&lmask)); 1753 PetscCall(PetscFree(lrows)); 1754 1755 /* only change matrix nonzero state if pattern was allowed to be changed */ 1756 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1757 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1758 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1759 } 1760 PetscFunctionReturn(PETSC_SUCCESS); 1761 } 1762 1763 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1764 { 1765 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1766 1767 PetscFunctionBegin; 1768 PetscCall(MatSetUnfactored(a->A)); 1769 PetscFunctionReturn(PETSC_SUCCESS); 1770 } 1771 1772 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1773 1774 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1775 { 1776 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1777 Mat a, b, c, d; 1778 PetscBool flg; 1779 1780 PetscFunctionBegin; 1781 a = matA->A; 1782 b = matA->B; 1783 c = matB->A; 1784 d = matB->B; 1785 1786 PetscCall(MatEqual(a, c, &flg)); 1787 if (flg) PetscCall(MatEqual(b, d, &flg)); 1788 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 1792 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1793 { 1794 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1795 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 PetscCall(MatCopy_Basic(A, B, str)); 1801 } else { 1802 PetscCall(MatCopy(a->A, b->A, str)); 1803 PetscCall(MatCopy(a->B, b->B, str)); 1804 } 1805 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1810 { 1811 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1812 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1813 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1814 1815 PetscFunctionBegin; 1816 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1817 PetscFunctionReturn(PETSC_SUCCESS); 1818 } 1819 1820 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1821 { 1822 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1823 PetscBLASInt bnz, one = 1; 1824 Mat_SeqBAIJ *x, *y; 1825 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1826 1827 PetscFunctionBegin; 1828 if (str == SAME_NONZERO_PATTERN) { 1829 PetscScalar alpha = a; 1830 x = (Mat_SeqBAIJ *)xx->A->data; 1831 y = (Mat_SeqBAIJ *)yy->A->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 x = (Mat_SeqBAIJ *)xx->B->data; 1835 y = (Mat_SeqBAIJ *)yy->B->data; 1836 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1837 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1838 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1839 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1840 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1841 } else { 1842 Mat B; 1843 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1844 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1845 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1846 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1847 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1848 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1849 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1850 PetscCall(MatSetType(B, MATMPIBAIJ)); 1851 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1852 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1853 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1854 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1855 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1856 PetscCall(MatHeaderMerge(Y, &B)); 1857 PetscCall(PetscFree(nnz_d)); 1858 PetscCall(PetscFree(nnz_o)); 1859 } 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1864 { 1865 PetscFunctionBegin; 1866 if (PetscDefined(USE_COMPLEX)) { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1868 1869 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1870 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1871 } 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatRealPart(a->A)); 1881 PetscCall(MatRealPart(a->B)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1886 { 1887 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1888 1889 PetscFunctionBegin; 1890 PetscCall(MatImaginaryPart(a->A)); 1891 PetscCall(MatImaginaryPart(a->B)); 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1896 { 1897 IS iscol_local; 1898 PetscInt csize; 1899 1900 PetscFunctionBegin; 1901 PetscCall(ISGetLocalSize(iscol, &csize)); 1902 if (call == MAT_REUSE_MATRIX) { 1903 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1904 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1905 } else { 1906 PetscCall(ISAllGather(iscol, &iscol_local)); 1907 } 1908 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1909 if (call == MAT_INITIAL_MATRIX) { 1910 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1911 PetscCall(ISDestroy(&iscol_local)); 1912 } 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /* 1917 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1918 in local and then by concatenating the local matrices the end result. 1919 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1920 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1921 */ 1922 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1923 { 1924 PetscMPIInt rank, size; 1925 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1926 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1927 Mat M, Mreuse; 1928 MatScalar *vwork, *aa; 1929 MPI_Comm comm; 1930 IS isrow_new, iscol_new; 1931 Mat_SeqBAIJ *aij; 1932 1933 PetscFunctionBegin; 1934 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1935 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1936 PetscCallMPI(MPI_Comm_size(comm, &size)); 1937 /* The compression and expansion should be avoided. Doesn't point 1938 out errors, might change the indices, hence buggey */ 1939 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1940 if (isrow == iscol) { 1941 iscol_new = isrow_new; 1942 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1943 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj = PetscSafePointerPlusOffset(jj, nz); 2034 vwork = aa; 2035 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* Tell every processor the number of nonzeros per row */ 2116 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2117 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2118 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2119 displs = recvcounts + size; 2120 for (i = 0; i < size; i++) { 2121 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2122 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2123 } 2124 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2125 /* Create the sequential matrix of the same type as the local block diagonal */ 2126 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2127 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2128 PetscCall(MatSetType(B, MATSEQAIJ)); 2129 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2130 b = (Mat_SeqAIJ *)B->data; 2131 2132 /* Copy my part of matrix column indices over */ 2133 sendcount = ad->nz + bd->nz; 2134 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2135 a_jsendbuf = ad->j; 2136 b_jsendbuf = bd->j; 2137 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2138 cnt = 0; 2139 for (i = 0; i < n; i++) { 2140 /* put in lower diagonal portion */ 2141 m = bd->i[i + 1] - bd->i[i]; 2142 while (m > 0) { 2143 /* is it above diagonal (in bd (compressed) numbering) */ 2144 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2145 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2146 m--; 2147 } 2148 2149 /* put in diagonal portion */ 2150 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2151 2152 /* put in upper diagonal portion */ 2153 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2154 } 2155 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2156 2157 /* Gather all column indices to all processors */ 2158 for (i = 0; i < size; i++) { 2159 recvcounts[i] = 0; 2160 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2161 } 2162 displs[0] = 0; 2163 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2164 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2165 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2166 /* set the b->ilen (length of each row) values */ 2167 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2168 /* set the b->i indices */ 2169 b->i[0] = 0; 2170 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2171 PetscCall(PetscFree(lens)); 2172 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2173 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(PetscFree(recvcounts)); 2175 2176 PetscCall(MatPropagateSymmetryOptions(A, B)); 2177 *newmat = B; 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2182 { 2183 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2184 Vec bb1 = NULL; 2185 2186 PetscFunctionBegin; 2187 if (flag == SOR_APPLY_UPPER) { 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2193 2194 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2195 if (flag & SOR_ZERO_INITIAL_GUESS) { 2196 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2197 its--; 2198 } 2199 2200 while (its--) { 2201 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2202 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 2204 /* update rhs: bb1 = bb - B*x */ 2205 PetscCall(VecScale(mat->lvec, -1.0)); 2206 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2207 2208 /* local sweep */ 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2210 } 2211 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2212 if (flag & SOR_ZERO_INITIAL_GUESS) { 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2214 its--; 2215 } 2216 while (its--) { 2217 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2218 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 2220 /* update rhs: bb1 = bb - B*x */ 2221 PetscCall(VecScale(mat->lvec, -1.0)); 2222 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2223 2224 /* local sweep */ 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2226 } 2227 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2228 if (flag & SOR_ZERO_INITIAL_GUESS) { 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2230 its--; 2231 } 2232 while (its--) { 2233 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2234 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 2236 /* update rhs: bb1 = bb - B*x */ 2237 PetscCall(VecScale(mat->lvec, -1.0)); 2238 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2239 2240 /* local sweep */ 2241 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2242 } 2243 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2244 2245 PetscCall(VecDestroy(&bb1)); 2246 PetscFunctionReturn(PETSC_SUCCESS); 2247 } 2248 2249 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2250 { 2251 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2252 PetscInt m, N, i, *garray = aij->garray; 2253 PetscInt ib, jb, bs = A->rmap->bs; 2254 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2255 MatScalar *a_val = a_aij->a; 2256 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2257 MatScalar *b_val = b_aij->a; 2258 PetscReal *work; 2259 PetscMPIInt iN; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatGetSize(A, &m, &N)); 2263 PetscCall(PetscCalloc1(N, &work)); 2264 if (type == NORM_2) { 2265 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2269 a_val++; 2270 } 2271 } 2272 } 2273 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2277 b_val++; 2278 } 2279 } 2280 } 2281 } else if (type == NORM_1) { 2282 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2286 a_val++; 2287 } 2288 } 2289 } 2290 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2294 b_val++; 2295 } 2296 } 2297 } 2298 } else if (type == NORM_INFINITY) { 2299 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2304 a_val++; 2305 } 2306 } 2307 } 2308 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2313 b_val++; 2314 } 2315 } 2316 } 2317 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2318 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2335 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2344 for (jb = 0; jb < bs; jb++) { 2345 for (ib = 0; ib < bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2352 PetscCall(PetscMPIIntCast(N, &iN)); 2353 if (type == NORM_INFINITY) { 2354 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2355 } else { 2356 PetscCallMPI(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2357 } 2358 PetscCall(PetscFree(work)); 2359 if (type == NORM_2) { 2360 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2361 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2362 for (i = 0; i < N; i++) reductions[i] /= m; 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2368 { 2369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2370 2371 PetscFunctionBegin; 2372 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2373 A->factorerrortype = a->A->factorerrortype; 2374 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2375 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2380 { 2381 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2382 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2383 2384 PetscFunctionBegin; 2385 if (!Y->preallocated) { 2386 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2387 } else if (!aij->nz) { 2388 PetscInt nonew = aij->nonew; 2389 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2390 aij->nonew = nonew; 2391 } 2392 PetscCall(MatShift_Basic(Y, a)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 2396 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2397 { 2398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2399 2400 PetscFunctionBegin; 2401 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2402 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2403 if (d) { 2404 PetscInt rstart; 2405 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2406 *d += rstart / A->rmap->bs; 2407 } 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2412 { 2413 PetscFunctionBegin; 2414 *a = ((Mat_MPIBAIJ *)A->data)->A; 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2419 { 2420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2421 2422 PetscFunctionBegin; 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2424 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2429 MatGetRow_MPIBAIJ, 2430 MatRestoreRow_MPIBAIJ, 2431 MatMult_MPIBAIJ, 2432 /* 4*/ MatMultAdd_MPIBAIJ, 2433 MatMultTranspose_MPIBAIJ, 2434 MatMultTransposeAdd_MPIBAIJ, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*10*/ NULL, 2439 NULL, 2440 NULL, 2441 MatSOR_MPIBAIJ, 2442 MatTranspose_MPIBAIJ, 2443 /*15*/ MatGetInfo_MPIBAIJ, 2444 MatEqual_MPIBAIJ, 2445 MatGetDiagonal_MPIBAIJ, 2446 MatDiagonalScale_MPIBAIJ, 2447 MatNorm_MPIBAIJ, 2448 /*20*/ MatAssemblyBegin_MPIBAIJ, 2449 MatAssemblyEnd_MPIBAIJ, 2450 MatSetOption_MPIBAIJ, 2451 MatZeroEntries_MPIBAIJ, 2452 /*24*/ MatZeroRows_MPIBAIJ, 2453 NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 /*29*/ MatSetUp_MPI_Hash, 2458 NULL, 2459 NULL, 2460 MatGetDiagonalBlock_MPIBAIJ, 2461 NULL, 2462 /*34*/ MatDuplicate_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 /*39*/ MatAXPY_MPIBAIJ, 2468 MatCreateSubMatrices_MPIBAIJ, 2469 MatIncreaseOverlap_MPIBAIJ, 2470 MatGetValues_MPIBAIJ, 2471 MatCopy_MPIBAIJ, 2472 /*44*/ NULL, 2473 MatScale_MPIBAIJ, 2474 MatShift_MPIBAIJ, 2475 NULL, 2476 MatZeroRowsColumns_MPIBAIJ, 2477 /*49*/ NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 /*54*/ MatFDColoringCreate_MPIXAIJ, 2483 NULL, 2484 MatSetUnfactored_MPIBAIJ, 2485 MatPermute_MPIBAIJ, 2486 MatSetValuesBlocked_MPIBAIJ, 2487 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2488 MatDestroy_MPIBAIJ, 2489 MatView_MPIBAIJ, 2490 NULL, 2491 NULL, 2492 /*64*/ NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*74*/ NULL, 2503 MatFDColoringApply_BAIJ, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*79*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 MatLoad_MPIBAIJ, 2512 /*84*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*89*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*94*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*99*/ NULL, 2528 NULL, 2529 NULL, 2530 MatConjugate_MPIBAIJ, 2531 NULL, 2532 /*104*/ NULL, 2533 MatRealPart_MPIBAIJ, 2534 MatImaginaryPart_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 /*109*/ NULL, 2538 NULL, 2539 NULL, 2540 NULL, 2541 MatMissingDiagonal_MPIBAIJ, 2542 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2543 NULL, 2544 MatGetGhosts_MPIBAIJ, 2545 NULL, 2546 NULL, 2547 /*119*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 MatGetMultiProcBlock_MPIBAIJ, 2552 /*124*/ NULL, 2553 MatGetColumnReductions_MPIBAIJ, 2554 MatInvertBlockDiagonal_MPIBAIJ, 2555 NULL, 2556 NULL, 2557 /*129*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*134*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*139*/ MatSetBlockSizes_Default, 2568 NULL, 2569 NULL, 2570 MatFDColoringSetUp_MPIXAIJ, 2571 NULL, 2572 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 /*150*/ NULL, 2579 MatEliminateZeros_MPIBAIJ, 2580 MatGetRowSumAbs_MPIBAIJ, 2581 NULL, 2582 NULL, 2583 NULL}; 2584 2585 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2586 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2587 2588 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2589 { 2590 PetscInt m, rstart, cstart, cend; 2591 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2592 const PetscInt *JJ = NULL; 2593 PetscScalar *values = NULL; 2594 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2595 PetscBool nooffprocentries; 2596 2597 PetscFunctionBegin; 2598 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2599 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2600 PetscCall(PetscLayoutSetUp(B->rmap)); 2601 PetscCall(PetscLayoutSetUp(B->cmap)); 2602 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2603 m = B->rmap->n / bs; 2604 rstart = B->rmap->rstart / bs; 2605 cstart = B->cmap->rstart / bs; 2606 cend = B->cmap->rend / bs; 2607 2608 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2609 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2610 for (i = 0; i < m; i++) { 2611 nz = ii[i + 1] - ii[i]; 2612 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2613 nz_max = PetscMax(nz_max, nz); 2614 dlen = 0; 2615 olen = 0; 2616 JJ = jj + ii[i]; 2617 for (j = 0; j < nz; j++) { 2618 if (*JJ < cstart || *JJ >= cend) olen++; 2619 else dlen++; 2620 JJ++; 2621 } 2622 d_nnz[i] = dlen; 2623 o_nnz[i] = olen; 2624 } 2625 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2626 PetscCall(PetscFree2(d_nnz, o_nnz)); 2627 2628 values = (PetscScalar *)V; 2629 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2630 for (i = 0; i < m; i++) { 2631 PetscInt row = i + rstart; 2632 PetscInt ncols = ii[i + 1] - ii[i]; 2633 const PetscInt *icols = jj + ii[i]; 2634 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2635 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2637 } else { /* block ordering does not match so we can only insert one block at a time. */ 2638 PetscInt j; 2639 for (j = 0; j < ncols; j++) { 2640 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2641 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2642 } 2643 } 2644 } 2645 2646 if (!V) PetscCall(PetscFree(values)); 2647 nooffprocentries = B->nooffprocentries; 2648 B->nooffprocentries = PETSC_TRUE; 2649 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2650 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2651 B->nooffprocentries = nooffprocentries; 2652 2653 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2654 PetscFunctionReturn(PETSC_SUCCESS); 2655 } 2656 2657 /*@C 2658 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2659 2660 Collective 2661 2662 Input Parameters: 2663 + B - the matrix 2664 . bs - the block size 2665 . i - the indices into `j` for the start of each local row (starts with zero) 2666 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2667 - v - optional values in the matrix, use `NULL` if not provided 2668 2669 Level: advanced 2670 2671 Notes: 2672 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2673 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2674 called this routine. 2675 2676 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2677 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2678 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2679 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2680 block column and the second index is over columns within a block. 2681 2682 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2683 2684 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2685 @*/ 2686 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2687 { 2688 PetscFunctionBegin; 2689 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2690 PetscValidType(B, 1); 2691 PetscValidLogicalCollectiveInt(B, bs, 2); 2692 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2693 PetscFunctionReturn(PETSC_SUCCESS); 2694 } 2695 2696 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2697 { 2698 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2699 PetscInt i; 2700 PetscMPIInt size; 2701 2702 PetscFunctionBegin; 2703 if (B->hash_active) { 2704 B->ops[0] = b->cops; 2705 B->hash_active = PETSC_FALSE; 2706 } 2707 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2708 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2709 PetscCall(PetscLayoutSetUp(B->rmap)); 2710 PetscCall(PetscLayoutSetUp(B->cmap)); 2711 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2712 2713 if (d_nnz) { 2714 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2715 } 2716 if (o_nnz) { 2717 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2718 } 2719 2720 b->bs2 = bs * bs; 2721 b->mbs = B->rmap->n / bs; 2722 b->nbs = B->cmap->n / bs; 2723 b->Mbs = B->rmap->N / bs; 2724 b->Nbs = B->cmap->N / bs; 2725 2726 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2727 b->rstartbs = B->rmap->rstart / bs; 2728 b->rendbs = B->rmap->rend / bs; 2729 b->cstartbs = B->cmap->rstart / bs; 2730 b->cendbs = B->cmap->rend / bs; 2731 2732 #if defined(PETSC_USE_CTABLE) 2733 PetscCall(PetscHMapIDestroy(&b->colmap)); 2734 #else 2735 PetscCall(PetscFree(b->colmap)); 2736 #endif 2737 PetscCall(PetscFree(b->garray)); 2738 PetscCall(VecDestroy(&b->lvec)); 2739 PetscCall(VecScatterDestroy(&b->Mvctx)); 2740 2741 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2742 2743 MatSeqXAIJGetOptions_Private(b->B); 2744 PetscCall(MatDestroy(&b->B)); 2745 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2746 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2747 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2748 MatSeqXAIJRestoreOptions_Private(b->B); 2749 2750 MatSeqXAIJGetOptions_Private(b->A); 2751 PetscCall(MatDestroy(&b->A)); 2752 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2753 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2754 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2755 MatSeqXAIJRestoreOptions_Private(b->A); 2756 2757 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2758 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2759 B->preallocated = PETSC_TRUE; 2760 B->was_assembled = PETSC_FALSE; 2761 B->assembled = PETSC_FALSE; 2762 PetscFunctionReturn(PETSC_SUCCESS); 2763 } 2764 2765 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2766 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2767 2768 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2769 { 2770 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2771 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2772 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2773 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2774 2775 PetscFunctionBegin; 2776 PetscCall(PetscMalloc1(M + 1, &ii)); 2777 ii[0] = 0; 2778 for (i = 0; i < M; i++) { 2779 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2780 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2781 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2782 /* remove one from count of matrix has diagonal */ 2783 for (j = id[i]; j < id[i + 1]; j++) { 2784 if (jd[j] == i) { 2785 ii[i + 1]--; 2786 break; 2787 } 2788 } 2789 } 2790 PetscCall(PetscMalloc1(ii[M], &jj)); 2791 cnt = 0; 2792 for (i = 0; i < M; i++) { 2793 for (j = io[i]; j < io[i + 1]; j++) { 2794 if (garray[jo[j]] > rstart) break; 2795 jj[cnt++] = garray[jo[j]]; 2796 } 2797 for (k = id[i]; k < id[i + 1]; k++) { 2798 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2799 } 2800 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2801 } 2802 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2803 PetscFunctionReturn(PETSC_SUCCESS); 2804 } 2805 2806 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2807 2808 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2809 2810 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2811 { 2812 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2813 Mat_MPIAIJ *b; 2814 Mat B; 2815 2816 PetscFunctionBegin; 2817 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2818 2819 if (reuse == MAT_REUSE_MATRIX) { 2820 B = *newmat; 2821 } else { 2822 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2823 PetscCall(MatSetType(B, MATMPIAIJ)); 2824 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2825 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2826 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2827 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2828 } 2829 b = (Mat_MPIAIJ *)B->data; 2830 2831 if (reuse == MAT_REUSE_MATRIX) { 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2833 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2834 } else { 2835 PetscInt *garray = a->garray; 2836 Mat_SeqAIJ *bB; 2837 PetscInt bs, nnz; 2838 PetscCall(MatDestroy(&b->A)); 2839 PetscCall(MatDestroy(&b->B)); 2840 /* just clear out the data structure */ 2841 PetscCall(MatDisAssemble_MPIAIJ(B)); 2842 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2843 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2844 2845 /* Global numbering for b->B columns */ 2846 bB = (Mat_SeqAIJ *)b->B->data; 2847 bs = A->rmap->bs; 2848 nnz = bB->i[A->rmap->n]; 2849 for (PetscInt k = 0; k < nnz; k++) { 2850 PetscInt bj = bB->j[k] / bs; 2851 PetscInt br = bB->j[k] % bs; 2852 bB->j[k] = garray[bj] * bs + br; 2853 } 2854 } 2855 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2856 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2857 2858 if (reuse == MAT_INPLACE_MATRIX) { 2859 PetscCall(MatHeaderReplace(A, &B)); 2860 } else { 2861 *newmat = B; 2862 } 2863 PetscFunctionReturn(PETSC_SUCCESS); 2864 } 2865 2866 /*MC 2867 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2868 2869 Options Database Keys: 2870 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2871 . -mat_block_size <bs> - set the blocksize used to store the matrix 2872 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2873 - -mat_use_hash_table <fact> - set hash table factor 2874 2875 Level: beginner 2876 2877 Note: 2878 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2879 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2880 2881 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2882 M*/ 2883 2884 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2885 2886 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2887 { 2888 Mat_MPIBAIJ *b; 2889 PetscBool flg = PETSC_FALSE; 2890 2891 PetscFunctionBegin; 2892 PetscCall(PetscNew(&b)); 2893 B->data = (void *)b; 2894 B->ops[0] = MatOps_Values; 2895 B->assembled = PETSC_FALSE; 2896 2897 B->insertmode = NOT_SET_VALUES; 2898 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2899 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2900 2901 /* build local table of row and column ownerships */ 2902 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2903 2904 /* build cache for off array entries formed */ 2905 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2906 2907 b->donotstash = PETSC_FALSE; 2908 b->colmap = NULL; 2909 b->garray = NULL; 2910 b->roworiented = PETSC_TRUE; 2911 2912 /* stuff used in block assembly */ 2913 b->barray = NULL; 2914 2915 /* stuff used for matrix vector multiply */ 2916 b->lvec = NULL; 2917 b->Mvctx = NULL; 2918 2919 /* stuff for MatGetRow() */ 2920 b->rowindices = NULL; 2921 b->rowvalues = NULL; 2922 b->getrowactive = PETSC_FALSE; 2923 2924 /* hash table stuff */ 2925 b->ht = NULL; 2926 b->hd = NULL; 2927 b->ht_size = 0; 2928 b->ht_flag = PETSC_FALSE; 2929 b->ht_fact = 0; 2930 b->ht_total_ct = 0; 2931 b->ht_insert_ct = 0; 2932 2933 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2934 b->ijonly = PETSC_FALSE; 2935 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2939 #if defined(PETSC_HAVE_HYPRE) 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2941 #endif 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2947 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2948 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2949 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2950 2951 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2952 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2953 if (flg) { 2954 PetscReal fact = 1.39; 2955 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2956 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2957 if (fact <= 1.0) fact = 1.39; 2958 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2959 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2960 } 2961 PetscOptionsEnd(); 2962 PetscFunctionReturn(PETSC_SUCCESS); 2963 } 2964 2965 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2966 /*MC 2967 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2968 2969 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2970 and `MATMPIBAIJ` otherwise. 2971 2972 Options Database Keys: 2973 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2974 2975 Level: beginner 2976 2977 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2978 M*/ 2979 2980 /*@ 2981 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2982 (block compressed row). 2983 2984 Collective 2985 2986 Input Parameters: 2987 + B - the matrix 2988 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2989 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2990 . d_nz - number of block nonzeros per block row in diagonal portion of local 2991 submatrix (same for all local rows) 2992 . d_nnz - array containing the number of block nonzeros in the various block rows 2993 of the in diagonal portion of the local (possibly different for each block 2994 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2995 set it even if it is zero. 2996 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2997 submatrix (same for all local rows). 2998 - o_nnz - array containing the number of nonzeros in the various block rows of the 2999 off-diagonal portion of the local submatrix (possibly different for 3000 each block row) or `NULL`. 3001 3002 If the *_nnz parameter is given then the *_nz parameter is ignored 3003 3004 Options Database Keys: 3005 + -mat_block_size - size of the blocks to use 3006 - -mat_use_hash_table <fact> - set hash table factor 3007 3008 Level: intermediate 3009 3010 Notes: 3011 For good matrix assembly performance 3012 the user should preallocate the matrix storage by setting the parameters 3013 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3014 performance can be increased by more than a factor of 50. 3015 3016 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3017 than it must be used on all processors that share the object for that argument. 3018 3019 Storage Information: 3020 For a square global matrix we define each processor's diagonal portion 3021 to be its local rows and the corresponding columns (a square submatrix); 3022 each processor's off-diagonal portion encompasses the remainder of the 3023 local matrix (a rectangular submatrix). 3024 3025 The user can specify preallocated storage for the diagonal part of 3026 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3027 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3028 memory allocation. Likewise, specify preallocated storage for the 3029 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3030 3031 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3032 the figure below we depict these three local rows and all columns (0-11). 3033 3034 .vb 3035 0 1 2 3 4 5 6 7 8 9 10 11 3036 -------------------------- 3037 row 3 |o o o d d d o o o o o o 3038 row 4 |o o o d d d o o o o o o 3039 row 5 |o o o d d d o o o o o o 3040 -------------------------- 3041 .ve 3042 3043 Thus, any entries in the d locations are stored in the d (diagonal) 3044 submatrix, and any entries in the o locations are stored in the 3045 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3046 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3047 3048 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3049 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3050 In general, for PDE problems in which most nonzeros are near the diagonal, 3051 one expects `d_nz` >> `o_nz`. 3052 3053 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3054 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3055 You can also run with the option `-info` and look for messages with the string 3056 malloc in them to see if additional memory allocation was needed. 3057 3058 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3059 @*/ 3060 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3061 { 3062 PetscFunctionBegin; 3063 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3064 PetscValidType(B, 1); 3065 PetscValidLogicalCollectiveInt(B, bs, 2); 3066 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3067 PetscFunctionReturn(PETSC_SUCCESS); 3068 } 3069 3070 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3071 /*@ 3072 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3073 (block compressed row). 3074 3075 Collective 3076 3077 Input Parameters: 3078 + comm - MPI communicator 3079 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3080 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3081 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3082 This value should be the same as the local size used in creating the 3083 y vector for the matrix-vector product y = Ax. 3084 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3085 This value should be the same as the local size used in creating the 3086 x vector for the matrix-vector product y = Ax. 3087 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3088 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3089 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3090 submatrix (same for all local rows) 3091 . d_nnz - array containing the number of nonzero blocks in the various block rows 3092 of the in diagonal portion of the local (possibly different for each block 3093 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3094 and set it even if it is zero. 3095 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3096 submatrix (same for all local rows). 3097 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3098 off-diagonal portion of the local submatrix (possibly different for 3099 each block row) or NULL. 3100 3101 Output Parameter: 3102 . A - the matrix 3103 3104 Options Database Keys: 3105 + -mat_block_size - size of the blocks to use 3106 - -mat_use_hash_table <fact> - set hash table factor 3107 3108 Level: intermediate 3109 3110 Notes: 3111 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3112 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3113 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3114 3115 For good matrix assembly performance 3116 the user should preallocate the matrix storage by setting the parameters 3117 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3118 performance can be increased by more than a factor of 50. 3119 3120 If the *_nnz parameter is given then the *_nz parameter is ignored 3121 3122 A nonzero block is any block that as 1 or more nonzeros in it 3123 3124 The user MUST specify either the local or global matrix dimensions 3125 (possibly both). 3126 3127 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3128 than it must be used on all processors that share the object for that argument. 3129 3130 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3131 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3132 3133 Storage Information: 3134 For a square global matrix we define each processor's diagonal portion 3135 to be its local rows and the corresponding columns (a square submatrix); 3136 each processor's off-diagonal portion encompasses the remainder of the 3137 local matrix (a rectangular submatrix). 3138 3139 The user can specify preallocated storage for the diagonal part of 3140 the local submatrix with either d_nz or d_nnz (not both). Set 3141 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3142 memory allocation. Likewise, specify preallocated storage for the 3143 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3144 3145 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3146 the figure below we depict these three local rows and all columns (0-11). 3147 3148 .vb 3149 0 1 2 3 4 5 6 7 8 9 10 11 3150 -------------------------- 3151 row 3 |o o o d d d o o o o o o 3152 row 4 |o o o d d d o o o o o o 3153 row 5 |o o o d d d o o o o o o 3154 -------------------------- 3155 .ve 3156 3157 Thus, any entries in the d locations are stored in the d (diagonal) 3158 submatrix, and any entries in the o locations are stored in the 3159 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3160 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3161 3162 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3163 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3164 In general, for PDE problems in which most nonzeros are near the diagonal, 3165 one expects `d_nz` >> `o_nz`. 3166 3167 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3168 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3169 @*/ 3170 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3171 { 3172 PetscMPIInt size; 3173 3174 PetscFunctionBegin; 3175 PetscCall(MatCreate(comm, A)); 3176 PetscCall(MatSetSizes(*A, m, n, M, N)); 3177 PetscCallMPI(MPI_Comm_size(comm, &size)); 3178 if (size > 1) { 3179 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3180 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3181 } else { 3182 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3183 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3184 } 3185 PetscFunctionReturn(PETSC_SUCCESS); 3186 } 3187 3188 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3189 { 3190 Mat mat; 3191 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3192 PetscInt len = 0; 3193 3194 PetscFunctionBegin; 3195 *newmat = NULL; 3196 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3197 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3198 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3199 3200 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3201 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3202 if (matin->hash_active) { 3203 PetscCall(MatSetUp(mat)); 3204 } else { 3205 mat->factortype = matin->factortype; 3206 mat->preallocated = PETSC_TRUE; 3207 mat->assembled = PETSC_TRUE; 3208 mat->insertmode = NOT_SET_VALUES; 3209 3210 a = (Mat_MPIBAIJ *)mat->data; 3211 mat->rmap->bs = matin->rmap->bs; 3212 a->bs2 = oldmat->bs2; 3213 a->mbs = oldmat->mbs; 3214 a->nbs = oldmat->nbs; 3215 a->Mbs = oldmat->Mbs; 3216 a->Nbs = oldmat->Nbs; 3217 3218 a->size = oldmat->size; 3219 a->rank = oldmat->rank; 3220 a->donotstash = oldmat->donotstash; 3221 a->roworiented = oldmat->roworiented; 3222 a->rowindices = NULL; 3223 a->rowvalues = NULL; 3224 a->getrowactive = PETSC_FALSE; 3225 a->barray = NULL; 3226 a->rstartbs = oldmat->rstartbs; 3227 a->rendbs = oldmat->rendbs; 3228 a->cstartbs = oldmat->cstartbs; 3229 a->cendbs = oldmat->cendbs; 3230 3231 /* hash table stuff */ 3232 a->ht = NULL; 3233 a->hd = NULL; 3234 a->ht_size = 0; 3235 a->ht_flag = oldmat->ht_flag; 3236 a->ht_fact = oldmat->ht_fact; 3237 a->ht_total_ct = 0; 3238 a->ht_insert_ct = 0; 3239 3240 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3241 if (oldmat->colmap) { 3242 #if defined(PETSC_USE_CTABLE) 3243 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3244 #else 3245 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3246 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3247 #endif 3248 } else a->colmap = NULL; 3249 3250 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3251 PetscCall(PetscMalloc1(len, &a->garray)); 3252 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3253 } else a->garray = NULL; 3254 3255 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3256 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3257 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3258 3259 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3260 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3261 } 3262 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3263 *newmat = mat; 3264 PetscFunctionReturn(PETSC_SUCCESS); 3265 } 3266 3267 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3268 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3269 { 3270 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3271 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3272 PetscScalar *matvals; 3273 3274 PetscFunctionBegin; 3275 PetscCall(PetscViewerSetUp(viewer)); 3276 3277 /* read in matrix header */ 3278 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3279 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3280 M = header[1]; 3281 N = header[2]; 3282 nz = header[3]; 3283 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3284 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3285 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3286 3287 /* set block sizes from the viewer's .info file */ 3288 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3289 /* set local sizes if not set already */ 3290 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3291 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3292 /* set global sizes if not set already */ 3293 if (mat->rmap->N < 0) mat->rmap->N = M; 3294 if (mat->cmap->N < 0) mat->cmap->N = N; 3295 PetscCall(PetscLayoutSetUp(mat->rmap)); 3296 PetscCall(PetscLayoutSetUp(mat->cmap)); 3297 3298 /* check if the matrix sizes are correct */ 3299 PetscCall(MatGetSize(mat, &rows, &cols)); 3300 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3301 PetscCall(MatGetBlockSize(mat, &bs)); 3302 PetscCall(MatGetLocalSize(mat, &m, &n)); 3303 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3304 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3305 mbs = m / bs; 3306 nbs = n / bs; 3307 3308 /* read in row lengths and build row indices */ 3309 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3310 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3311 rowidxs[0] = 0; 3312 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3313 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3314 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3315 3316 /* read in column indices and matrix values */ 3317 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3318 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3319 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3320 3321 { /* preallocate matrix storage */ 3322 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3323 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3324 PetscBool sbaij, done; 3325 PetscInt *d_nnz, *o_nnz; 3326 3327 PetscCall(PetscBTCreate(nbs, &bt)); 3328 PetscCall(PetscHSetICreate(&ht)); 3329 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3330 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3331 for (i = 0; i < mbs; i++) { 3332 PetscCall(PetscBTMemzero(nbs, bt)); 3333 PetscCall(PetscHSetIClear(ht)); 3334 for (k = 0; k < bs; k++) { 3335 PetscInt row = bs * i + k; 3336 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3337 PetscInt col = colidxs[j]; 3338 if (!sbaij || col >= row) { 3339 if (col >= cs && col < ce) { 3340 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3341 } else { 3342 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3343 if (done) o_nnz[i]++; 3344 } 3345 } 3346 } 3347 } 3348 } 3349 PetscCall(PetscBTDestroy(&bt)); 3350 PetscCall(PetscHSetIDestroy(&ht)); 3351 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3352 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3353 PetscCall(PetscFree2(d_nnz, o_nnz)); 3354 } 3355 3356 /* store matrix values */ 3357 for (i = 0; i < m; i++) { 3358 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3359 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3360 } 3361 3362 PetscCall(PetscFree(rowidxs)); 3363 PetscCall(PetscFree2(colidxs, matvals)); 3364 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3365 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3366 PetscFunctionReturn(PETSC_SUCCESS); 3367 } 3368 3369 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3370 { 3371 PetscBool isbinary; 3372 3373 PetscFunctionBegin; 3374 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3375 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3376 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3377 PetscFunctionReturn(PETSC_SUCCESS); 3378 } 3379 3380 /*@ 3381 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3382 3383 Input Parameters: 3384 + mat - the matrix 3385 - fact - factor 3386 3387 Options Database Key: 3388 . -mat_use_hash_table <fact> - provide the factor 3389 3390 Level: advanced 3391 3392 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3393 @*/ 3394 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3395 { 3396 PetscFunctionBegin; 3397 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 3401 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3402 { 3403 Mat_MPIBAIJ *baij; 3404 3405 PetscFunctionBegin; 3406 baij = (Mat_MPIBAIJ *)mat->data; 3407 baij->ht_fact = fact; 3408 PetscFunctionReturn(PETSC_SUCCESS); 3409 } 3410 3411 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3412 { 3413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3414 PetscBool flg; 3415 3416 PetscFunctionBegin; 3417 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3418 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3419 if (Ad) *Ad = a->A; 3420 if (Ao) *Ao = a->B; 3421 if (colmap) *colmap = a->garray; 3422 PetscFunctionReturn(PETSC_SUCCESS); 3423 } 3424 3425 /* 3426 Special version for direct calls from Fortran (to eliminate two function call overheads 3427 */ 3428 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3429 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3430 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3431 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3432 #endif 3433 3434 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3435 /*@C 3436 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3437 3438 Collective 3439 3440 Input Parameters: 3441 + matin - the matrix 3442 . min - number of input rows 3443 . im - input rows 3444 . nin - number of input columns 3445 . in - input columns 3446 . v - numerical values input 3447 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3448 3449 Level: advanced 3450 3451 Developer Notes: 3452 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3453 3454 .seealso: `Mat`, `MatSetValuesBlocked()` 3455 @*/ 3456 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3457 { 3458 /* convert input arguments to C version */ 3459 Mat mat = *matin; 3460 PetscInt m = *min, n = *nin; 3461 InsertMode addv = *addvin; 3462 3463 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3464 const MatScalar *value; 3465 MatScalar *barray = baij->barray; 3466 PetscBool roworiented = baij->roworiented; 3467 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3468 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3469 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3470 3471 PetscFunctionBegin; 3472 /* tasks normally handled by MatSetValuesBlocked() */ 3473 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3474 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3475 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3476 if (mat->assembled) { 3477 mat->was_assembled = PETSC_TRUE; 3478 mat->assembled = PETSC_FALSE; 3479 } 3480 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3481 3482 if (!barray) { 3483 PetscCall(PetscMalloc1(bs2, &barray)); 3484 baij->barray = barray; 3485 } 3486 3487 if (roworiented) stepval = (n - 1) * bs; 3488 else stepval = (m - 1) * bs; 3489 3490 for (i = 0; i < m; i++) { 3491 if (im[i] < 0) continue; 3492 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3493 if (im[i] >= rstart && im[i] < rend) { 3494 row = im[i] - rstart; 3495 for (j = 0; j < n; j++) { 3496 /* If NumCol = 1 then a copy is not required */ 3497 if ((roworiented) && (n == 1)) { 3498 barray = (MatScalar *)v + i * bs2; 3499 } else if ((!roworiented) && (m == 1)) { 3500 barray = (MatScalar *)v + j * bs2; 3501 } else { /* Here a copy is required */ 3502 if (roworiented) { 3503 value = v + i * (stepval + bs) * bs + j * bs; 3504 } else { 3505 value = v + j * (stepval + bs) * bs + i * bs; 3506 } 3507 for (ii = 0; ii < bs; ii++, value += stepval) { 3508 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3509 } 3510 barray -= bs2; 3511 } 3512 3513 if (in[j] >= cstart && in[j] < cend) { 3514 col = in[j] - cstart; 3515 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3516 } else if (in[j] < 0) { 3517 continue; 3518 } else { 3519 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3520 if (mat->was_assembled) { 3521 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3522 3523 #if defined(PETSC_USE_DEBUG) 3524 #if defined(PETSC_USE_CTABLE) 3525 { 3526 PetscInt data; 3527 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3528 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3529 } 3530 #else 3531 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3532 #endif 3533 #endif 3534 #if defined(PETSC_USE_CTABLE) 3535 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3536 col = (col - 1) / bs; 3537 #else 3538 col = (baij->colmap[in[j]] - 1) / bs; 3539 #endif 3540 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3541 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3542 col = in[j]; 3543 } 3544 } else col = in[j]; 3545 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3546 } 3547 } 3548 } else { 3549 if (!baij->donotstash) { 3550 if (roworiented) { 3551 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3552 } else { 3553 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3554 } 3555 } 3556 } 3557 } 3558 3559 /* task normally handled by MatSetValuesBlocked() */ 3560 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3561 PetscFunctionReturn(PETSC_SUCCESS); 3562 } 3563 3564 /*@ 3565 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3566 3567 Collective 3568 3569 Input Parameters: 3570 + comm - MPI communicator 3571 . bs - the block size, only a block size of 1 is supported 3572 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3573 . n - This value should be the same as the local size used in creating the 3574 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3575 calculated if `N` is given) For square matrices `n` is almost always `m`. 3576 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3577 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3578 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3579 . j - column indices 3580 - a - matrix values 3581 3582 Output Parameter: 3583 . mat - the matrix 3584 3585 Level: intermediate 3586 3587 Notes: 3588 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3589 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3590 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3591 3592 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3593 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3594 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3595 with column-major ordering within blocks. 3596 3597 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3598 3599 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3600 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3601 @*/ 3602 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3603 { 3604 PetscFunctionBegin; 3605 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3606 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3607 PetscCall(MatCreate(comm, mat)); 3608 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3609 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3610 PetscCall(MatSetBlockSize(*mat, bs)); 3611 PetscCall(MatSetUp(*mat)); 3612 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3613 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3614 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3615 PetscFunctionReturn(PETSC_SUCCESS); 3616 } 3617 3618 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3619 { 3620 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3621 PetscInt *indx; 3622 PetscScalar *values; 3623 3624 PetscFunctionBegin; 3625 PetscCall(MatGetSize(inmat, &m, &N)); 3626 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3627 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3628 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3629 PetscInt *bindx, rmax = a->rmax, j; 3630 PetscMPIInt rank, size; 3631 3632 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3633 mbs = m / bs; 3634 Nbs = N / cbs; 3635 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3636 nbs = n / cbs; 3637 3638 PetscCall(PetscMalloc1(rmax, &bindx)); 3639 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3640 3641 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3642 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3643 if (rank == size - 1) { 3644 /* Check sum(nbs) = Nbs */ 3645 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3646 } 3647 3648 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3649 for (i = 0; i < mbs; i++) { 3650 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3651 nnz = nnz / bs; 3652 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3653 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3654 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3655 } 3656 PetscCall(PetscFree(bindx)); 3657 3658 PetscCall(MatCreate(comm, outmat)); 3659 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3660 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3661 PetscCall(MatSetType(*outmat, MATBAIJ)); 3662 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3663 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3664 MatPreallocateEnd(dnz, onz); 3665 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3666 } 3667 3668 /* numeric phase */ 3669 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3670 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3671 3672 for (i = 0; i < m; i++) { 3673 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3674 Ii = i + rstart; 3675 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3676 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3677 } 3678 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3679 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3680 PetscFunctionReturn(PETSC_SUCCESS); 3681 } 3682