1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = aj + ai[brow]; \ 147 ap = aa + bs2 * ai[brow]; \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = bj + bi[brow]; \ 187 ap = ba + bs2 * bi[brow]; \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the roworiented flag to column oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 940 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 961 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 962 } 963 964 PetscCall(MatAssemblyBegin(baij->A, mode)); 965 PetscCall(MatAssemblyEnd(baij->A, mode)); 966 967 /* determine if any processor has disassembled, if so we must 968 also disassemble ourselves, in order that we may reassemble. */ 969 /* 970 if nonzero structure of submatrix B cannot change then we know that 971 no processor disassembled thus we can skip this stuff 972 */ 973 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 974 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 975 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 976 } 977 978 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 979 PetscCall(MatAssemblyBegin(baij->B, mode)); 980 PetscCall(MatAssemblyEnd(baij->B, mode)); 981 982 #if defined(PETSC_USE_INFO) 983 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 985 986 baij->ht_total_ct = 0; 987 baij->ht_insert_ct = 0; 988 } 989 #endif 990 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 991 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 992 993 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 994 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 995 } 996 997 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 998 999 baij->rowvalues = NULL; 1000 1001 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1002 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1003 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1004 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1005 } 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1010 #include <petscdraw.h> 1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1012 { 1013 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1014 PetscMPIInt rank = baij->rank; 1015 PetscInt bs = mat->rmap->bs; 1016 PetscBool iascii, isdraw; 1017 PetscViewer sviewer; 1018 PetscViewerFormat format; 1019 1020 PetscFunctionBegin; 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1022 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1023 if (iascii) { 1024 PetscCall(PetscViewerGetFormat(viewer, &format)); 1025 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1026 MatInfo info; 1027 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1028 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1031 mat->rmap->bs, (double)info.memory)); 1032 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1035 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1036 PetscCall(PetscViewerFlush(viewer)); 1037 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1038 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1039 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1042 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 } 1048 1049 if (isdraw) { 1050 PetscDraw draw; 1051 PetscBool isnull; 1052 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1053 PetscCall(PetscDrawIsNull(draw, &isnull)); 1054 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 { 1058 /* assemble the entire matrix onto first processor. */ 1059 Mat A; 1060 Mat_SeqBAIJ *Aloc; 1061 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1062 MatScalar *a; 1063 const char *matname; 1064 1065 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1066 /* Perhaps this should be the type of mat? */ 1067 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1068 if (rank == 0) { 1069 PetscCall(MatSetSizes(A, M, N, M, N)); 1070 } else { 1071 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1072 } 1073 PetscCall(MatSetType(A, MATMPIBAIJ)); 1074 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1075 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1076 1077 /* copy over the A part */ 1078 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1079 ai = Aloc->i; 1080 aj = Aloc->j; 1081 a = Aloc->a; 1082 PetscCall(PetscMalloc1(bs, &rvals)); 1083 1084 for (i = 0; i < mbs; i++) { 1085 rvals[0] = bs * (baij->rstartbs + i); 1086 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1087 for (j = ai[i]; j < ai[i + 1]; j++) { 1088 col = (baij->cstartbs + aj[j]) * bs; 1089 for (k = 0; k < bs; k++) { 1090 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1091 col++; 1092 a += bs; 1093 } 1094 } 1095 } 1096 /* copy over the B part */ 1097 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1098 ai = Aloc->i; 1099 aj = Aloc->j; 1100 a = Aloc->a; 1101 for (i = 0; i < mbs; i++) { 1102 rvals[0] = bs * (baij->rstartbs + i); 1103 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1104 for (j = ai[i]; j < ai[i + 1]; j++) { 1105 col = baij->garray[aj[j]] * bs; 1106 for (k = 0; k < bs; k++) { 1107 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1108 col++; 1109 a += bs; 1110 } 1111 } 1112 } 1113 PetscCall(PetscFree(rvals)); 1114 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1115 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1116 /* 1117 Everyone has to call to draw the matrix since the graphics waits are 1118 synchronized across all processors that share the PetscDraw object 1119 */ 1120 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1122 if (rank == 0) { 1123 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1124 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1125 } 1126 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1127 PetscCall(MatDestroy(&A)); 1128 } 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1133 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1134 { 1135 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1136 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1137 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1138 const PetscInt *garray = aij->garray; 1139 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1140 PetscInt64 nz, hnz; 1141 PetscInt *rowlens, *colidxs; 1142 PetscScalar *matvals; 1143 PetscMPIInt rank; 1144 1145 PetscFunctionBegin; 1146 PetscCall(PetscViewerSetUp(viewer)); 1147 1148 M = mat->rmap->N; 1149 N = mat->cmap->N; 1150 m = mat->rmap->n; 1151 rs = mat->rmap->rstart; 1152 cs = mat->cmap->rstart; 1153 bs = mat->rmap->bs; 1154 nz = bs * bs * (A->nz + B->nz); 1155 1156 /* write matrix header */ 1157 header[0] = MAT_FILE_CLASSID; 1158 header[1] = M; 1159 header[2] = N; 1160 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1161 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1162 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1163 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1164 1165 /* fill in and store row lengths */ 1166 PetscCall(PetscMalloc1(m, &rowlens)); 1167 for (cnt = 0, i = 0; i < A->mbs; i++) 1168 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1169 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1170 PetscCall(PetscFree(rowlens)); 1171 1172 /* fill in and store column indices */ 1173 PetscCall(PetscMalloc1(nz, &colidxs)); 1174 for (cnt = 0, i = 0; i < A->mbs; i++) { 1175 for (k = 0; k < bs; k++) { 1176 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1177 if (garray[B->j[jb]] > cs / bs) break; 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1182 for (; jb < B->i[i + 1]; jb++) 1183 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1184 } 1185 } 1186 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1187 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1188 PetscCall(PetscFree(colidxs)); 1189 1190 /* fill in and store nonzero values */ 1191 PetscCall(PetscMalloc1(nz, &matvals)); 1192 for (cnt = 0, i = 0; i < A->mbs; i++) { 1193 for (k = 0; k < bs; k++) { 1194 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1195 if (garray[B->j[jb]] > cs / bs) break; 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1200 for (; jb < B->i[i + 1]; jb++) 1201 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1202 } 1203 } 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1205 PetscCall(PetscFree(matvals)); 1206 1207 /* write block size option to the viewer's .info file */ 1208 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1213 { 1214 PetscBool iascii, isdraw, issocket, isbinary; 1215 1216 PetscFunctionBegin; 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1221 if (iascii || isdraw || issocket) { 1222 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1223 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1224 PetscFunctionReturn(PETSC_SUCCESS); 1225 } 1226 1227 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx, &nt)); 1234 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy, &nt)); 1236 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } 1297 1298 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A, aa)); 1304 PetscCall(MatScale(a->B, aa)); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1311 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1312 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1313 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1314 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1318 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1326 PetscInt max = 1, mbs = mat->mbs, tmp; 1327 for (i = 0; i < mbs; i++) { 1328 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; 1336 pcA = &cworkA; 1337 pvB = &vworkB; 1338 pcB = &cworkB; 1339 if (!v) { 1340 pvA = NULL; 1341 pvB = NULL; 1342 } 1343 if (!idx) { 1344 pcA = NULL; 1345 if (!v) pcB = NULL; 1346 } 1347 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1348 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1349 nztot = nzA + nzB; 1350 1351 cmap = mat->garray; 1352 if (v || idx) { 1353 if (nztot) { 1354 /* Sort by increasing column numbers, assuming A and B already sorted */ 1355 PetscInt imark = -1; 1356 if (v) { 1357 *v = v_p = mat->rowvalues; 1358 for (i = 0; i < nzB; i++) { 1359 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1360 else break; 1361 } 1362 imark = i; 1363 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1364 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1365 } 1366 if (idx) { 1367 *idx = idx_p = mat->rowindices; 1368 if (imark > -1) { 1369 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1370 } else { 1371 for (i = 0; i < nzB; i++) { 1372 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1373 else break; 1374 } 1375 imark = i; 1376 } 1377 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1378 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1379 } 1380 } else { 1381 if (idx) *idx = NULL; 1382 if (v) *v = NULL; 1383 } 1384 } 1385 *nz = nztot; 1386 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1387 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1394 1395 PetscFunctionBegin; 1396 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1397 baij->getrowactive = PETSC_FALSE; 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1402 { 1403 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1404 1405 PetscFunctionBegin; 1406 PetscCall(MatZeroEntries(l->A)); 1407 PetscCall(MatZeroEntries(l->B)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1414 Mat A = a->A, B = a->B; 1415 PetscLogDouble isend[5], irecv[5]; 1416 1417 PetscFunctionBegin; 1418 info->block_size = (PetscReal)matin->rmap->bs; 1419 1420 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1421 1422 isend[0] = info->nz_used; 1423 isend[1] = info->nz_allocated; 1424 isend[2] = info->nz_unneeded; 1425 isend[3] = info->memory; 1426 isend[4] = info->mallocs; 1427 1428 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1429 1430 isend[0] += info->nz_used; 1431 isend[1] += info->nz_allocated; 1432 isend[2] += info->nz_unneeded; 1433 isend[3] += info->memory; 1434 isend[4] += info->mallocs; 1435 1436 if (flag == MAT_LOCAL) { 1437 info->nz_used = isend[0]; 1438 info->nz_allocated = isend[1]; 1439 info->nz_unneeded = isend[2]; 1440 info->memory = isend[3]; 1441 info->mallocs = isend[4]; 1442 } else if (flag == MAT_GLOBAL_MAX) { 1443 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else if (flag == MAT_GLOBAL_SUM) { 1451 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1452 1453 info->nz_used = irecv[0]; 1454 info->nz_allocated = irecv[1]; 1455 info->nz_unneeded = irecv[2]; 1456 info->memory = irecv[3]; 1457 info->mallocs = irecv[4]; 1458 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1466 { 1467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1468 1469 PetscFunctionBegin; 1470 switch (op) { 1471 case MAT_NEW_NONZERO_LOCATIONS: 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1474 case MAT_KEEP_NONZERO_PATTERN: 1475 case MAT_NEW_NONZERO_LOCATION_ERR: 1476 MatCheckPreallocated(A, 1); 1477 PetscCall(MatSetOption(a->A, op, flg)); 1478 PetscCall(MatSetOption(a->B, op, flg)); 1479 break; 1480 case MAT_ROW_ORIENTED: 1481 MatCheckPreallocated(A, 1); 1482 a->roworiented = flg; 1483 1484 PetscCall(MatSetOption(a->A, op, flg)); 1485 PetscCall(MatSetOption(a->B, op, flg)); 1486 break; 1487 case MAT_FORCE_DIAGONAL_ENTRIES: 1488 case MAT_SORTED_FULL: 1489 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 a->ht_fact = 1.39; 1497 break; 1498 case MAT_SYMMETRIC: 1499 case MAT_STRUCTURALLY_SYMMETRIC: 1500 case MAT_HERMITIAN: 1501 case MAT_SUBMAT_SINGLEIS: 1502 case MAT_SYMMETRY_ETERNAL: 1503 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1504 case MAT_SPD_ETERNAL: 1505 /* if the diagonal matrix is square it inherits some of the properties above */ 1506 break; 1507 default: 1508 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1509 } 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1514 { 1515 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1516 Mat_SeqBAIJ *Aloc; 1517 Mat B; 1518 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1519 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1520 MatScalar *a; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1524 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1525 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1526 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1527 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1528 /* Do not know preallocation information, but must set block size */ 1529 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1530 } else { 1531 B = *matout; 1532 } 1533 1534 /* copy over the A part */ 1535 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1536 ai = Aloc->i; 1537 aj = Aloc->j; 1538 a = Aloc->a; 1539 PetscCall(PetscMalloc1(bs, &rvals)); 1540 1541 for (i = 0; i < mbs; i++) { 1542 rvals[0] = bs * (baij->rstartbs + i); 1543 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1544 for (j = ai[i]; j < ai[i + 1]; j++) { 1545 col = (baij->cstartbs + aj[j]) * bs; 1546 for (k = 0; k < bs; k++) { 1547 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1548 1549 col++; 1550 a += bs; 1551 } 1552 } 1553 } 1554 /* copy over the B part */ 1555 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1556 ai = Aloc->i; 1557 aj = Aloc->j; 1558 a = Aloc->a; 1559 for (i = 0; i < mbs; i++) { 1560 rvals[0] = bs * (baij->rstartbs + i); 1561 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 col = baij->garray[aj[j]] * bs; 1564 for (k = 0; k < bs; k++) { 1565 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1566 col++; 1567 a += bs; 1568 } 1569 } 1570 } 1571 PetscCall(PetscFree(rvals)); 1572 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1574 1575 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1576 else PetscCall(MatHeaderMerge(A, &B)); 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1581 { 1582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1583 Mat a = baij->A, b = baij->B; 1584 PetscInt s1, s2, s3; 1585 1586 PetscFunctionBegin; 1587 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1588 if (rr) { 1589 PetscCall(VecGetLocalSize(rr, &s1)); 1590 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1591 /* Overlap communication with computation. */ 1592 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 } 1594 if (ll) { 1595 PetscCall(VecGetLocalSize(ll, &s1)); 1596 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1597 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1598 } 1599 /* scale the diagonal block */ 1600 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1601 1602 if (rr) { 1603 /* Do a scatter end and then right scale the off-diagonal block */ 1604 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1605 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1606 } 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1611 { 1612 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1613 PetscInt *lrows; 1614 PetscInt r, len; 1615 PetscBool cong; 1616 1617 PetscFunctionBegin; 1618 /* get locally owned rows */ 1619 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1620 /* fix right hand side if needed */ 1621 if (x && b) { 1622 const PetscScalar *xx; 1623 PetscScalar *bb; 1624 1625 PetscCall(VecGetArrayRead(x, &xx)); 1626 PetscCall(VecGetArray(b, &bb)); 1627 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1628 PetscCall(VecRestoreArrayRead(x, &xx)); 1629 PetscCall(VecRestoreArray(b, &bb)); 1630 } 1631 1632 /* actually zap the local rows */ 1633 /* 1634 Zero the required rows. If the "diagonal block" of the matrix 1635 is square and the user wishes to set the diagonal we use separate 1636 code so that MatSetValues() is not called for each diagonal allocating 1637 new memory, thus calling lots of mallocs and slowing things down. 1638 1639 */ 1640 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1641 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1642 PetscCall(MatHasCongruentLayouts(A, &cong)); 1643 if ((diag != 0.0) && cong) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1645 } else if (diag != 0.0) { 1646 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1647 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1648 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1649 for (r = 0; r < len; ++r) { 1650 const PetscInt row = lrows[r] + A->rmap->rstart; 1651 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1652 } 1653 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1654 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1655 } else { 1656 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1657 } 1658 PetscCall(PetscFree(lrows)); 1659 1660 /* only change matrix nonzero state if pattern was allowed to be changed */ 1661 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1662 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1663 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1664 } 1665 PetscFunctionReturn(PETSC_SUCCESS); 1666 } 1667 1668 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1669 { 1670 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1671 PetscMPIInt n = A->rmap->n, p = 0; 1672 PetscInt i, j, k, r, len = 0, row, col, count; 1673 PetscInt *lrows, *owners = A->rmap->range; 1674 PetscSFNode *rrows; 1675 PetscSF sf; 1676 const PetscScalar *xx; 1677 PetscScalar *bb, *mask; 1678 Vec xmask, lmask; 1679 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1680 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1681 PetscScalar *aa; 1682 1683 PetscFunctionBegin; 1684 /* Create SF where leaves are input rows and roots are owned rows */ 1685 PetscCall(PetscMalloc1(n, &lrows)); 1686 for (r = 0; r < n; ++r) lrows[r] = -1; 1687 PetscCall(PetscMalloc1(N, &rrows)); 1688 for (r = 0; r < N; ++r) { 1689 const PetscInt idx = rows[r]; 1690 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1691 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1692 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1693 } 1694 rrows[r].rank = p; 1695 rrows[r].index = rows[r] - owners[p]; 1696 } 1697 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1698 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1699 /* Collect flags for rows to be zeroed */ 1700 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1702 PetscCall(PetscSFDestroy(&sf)); 1703 /* Compress and put in row numbers */ 1704 for (r = 0; r < n; ++r) 1705 if (lrows[r] >= 0) lrows[len++] = r; 1706 /* zero diagonal part of matrix */ 1707 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1708 /* handle off-diagonal part of matrix */ 1709 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1710 PetscCall(VecDuplicate(l->lvec, &lmask)); 1711 PetscCall(VecGetArray(xmask, &bb)); 1712 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1713 PetscCall(VecRestoreArray(xmask, &bb)); 1714 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecDestroy(&xmask)); 1717 if (x) { 1718 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1720 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1721 PetscCall(VecGetArray(b, &bb)); 1722 } 1723 PetscCall(VecGetArray(lmask, &mask)); 1724 /* remove zeroed rows of off-diagonal matrix */ 1725 for (i = 0; i < len; ++i) { 1726 row = lrows[i]; 1727 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1728 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1729 for (k = 0; k < count; ++k) { 1730 aa[0] = 0.0; 1731 aa += bs; 1732 } 1733 } 1734 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1735 for (i = 0; i < l->B->rmap->N; ++i) { 1736 row = i / bs; 1737 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1738 for (k = 0; k < bs; ++k) { 1739 col = bs * baij->j[j] + k; 1740 if (PetscAbsScalar(mask[col])) { 1741 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1742 if (x) bb[i] -= aa[0] * xx[col]; 1743 aa[0] = 0.0; 1744 } 1745 } 1746 } 1747 } 1748 if (x) { 1749 PetscCall(VecRestoreArray(b, &bb)); 1750 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1751 } 1752 PetscCall(VecRestoreArray(lmask, &mask)); 1753 PetscCall(VecDestroy(&lmask)); 1754 PetscCall(PetscFree(lrows)); 1755 1756 /* only change matrix nonzero state if pattern was allowed to be changed */ 1757 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1758 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1759 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1760 } 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1765 { 1766 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1767 1768 PetscFunctionBegin; 1769 PetscCall(MatSetUnfactored(a->A)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1774 1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1776 { 1777 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1778 Mat a, b, c, d; 1779 PetscBool flg; 1780 1781 PetscFunctionBegin; 1782 a = matA->A; 1783 b = matA->B; 1784 c = matB->A; 1785 d = matB->B; 1786 1787 PetscCall(MatEqual(a, c, &flg)); 1788 if (flg) PetscCall(MatEqual(b, d, &flg)); 1789 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1796 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 PetscCall(MatCopy_Basic(A, B, str)); 1802 } else { 1803 PetscCall(MatCopy(a->A, b->A, str)); 1804 PetscCall(MatCopy(a->B, b->B, str)); 1805 } 1806 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1811 { 1812 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1813 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1814 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1815 1816 PetscFunctionBegin; 1817 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1822 { 1823 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1824 PetscBLASInt bnz, one = 1; 1825 Mat_SeqBAIJ *x, *y; 1826 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1827 1828 PetscFunctionBegin; 1829 if (str == SAME_NONZERO_PATTERN) { 1830 PetscScalar alpha = a; 1831 x = (Mat_SeqBAIJ *)xx->A->data; 1832 y = (Mat_SeqBAIJ *)yy->A->data; 1833 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1834 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1835 x = (Mat_SeqBAIJ *)xx->B->data; 1836 y = (Mat_SeqBAIJ *)yy->B->data; 1837 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1838 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1839 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1841 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1842 } else { 1843 Mat B; 1844 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1845 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1846 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1847 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1848 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1849 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1850 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1851 PetscCall(MatSetType(B, MATMPIBAIJ)); 1852 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1853 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1854 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1855 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1856 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1857 PetscCall(MatHeaderMerge(Y, &B)); 1858 PetscCall(PetscFree(nnz_d)); 1859 PetscCall(PetscFree(nnz_o)); 1860 } 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1865 { 1866 PetscFunctionBegin; 1867 if (PetscDefined(USE_COMPLEX)) { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1869 1870 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1871 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatRealPart(a->A)); 1882 PetscCall(MatRealPart(a->B)); 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1887 { 1888 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1889 1890 PetscFunctionBegin; 1891 PetscCall(MatImaginaryPart(a->A)); 1892 PetscCall(MatImaginaryPart(a->B)); 1893 PetscFunctionReturn(PETSC_SUCCESS); 1894 } 1895 1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1897 { 1898 IS iscol_local; 1899 PetscInt csize; 1900 1901 PetscFunctionBegin; 1902 PetscCall(ISGetLocalSize(iscol, &csize)); 1903 if (call == MAT_REUSE_MATRIX) { 1904 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1905 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1906 } else { 1907 PetscCall(ISAllGather(iscol, &iscol_local)); 1908 } 1909 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1910 if (call == MAT_INITIAL_MATRIX) { 1911 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1912 PetscCall(ISDestroy(&iscol_local)); 1913 } 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 /* 1918 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1919 in local and then by concatenating the local matrices the end result. 1920 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1921 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1922 */ 1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1924 { 1925 PetscMPIInt rank, size; 1926 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1927 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1928 Mat M, Mreuse; 1929 MatScalar *vwork, *aa; 1930 MPI_Comm comm; 1931 IS isrow_new, iscol_new; 1932 Mat_SeqBAIJ *aij; 1933 1934 PetscFunctionBegin; 1935 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1936 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1937 PetscCallMPI(MPI_Comm_size(comm, &size)); 1938 /* The compression and expansion should be avoided. Doesn't point 1939 out errors, might change the indices, hence buggey */ 1940 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1941 if (isrow == iscol) { 1942 iscol_new = isrow_new; 1943 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1944 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1945 1946 if (call == MAT_REUSE_MATRIX) { 1947 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1948 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1949 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1950 } else { 1951 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1952 } 1953 PetscCall(ISDestroy(&isrow_new)); 1954 PetscCall(ISDestroy(&iscol_new)); 1955 /* 1956 m - number of local rows 1957 n - number of columns (same on all processors) 1958 rstart - first row in new global matrix generated 1959 */ 1960 PetscCall(MatGetBlockSize(mat, &bs)); 1961 PetscCall(MatGetSize(Mreuse, &m, &n)); 1962 m = m / bs; 1963 n = n / bs; 1964 1965 if (call == MAT_INITIAL_MATRIX) { 1966 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 1970 /* 1971 Determine the number of non-zeros in the diagonal and off-diagonal 1972 portions of the matrix in order to do correct preallocation 1973 */ 1974 1975 /* first get start and end of "diagonal" columns */ 1976 if (csize == PETSC_DECIDE) { 1977 PetscCall(ISGetSize(isrow, &mglobal)); 1978 if (mglobal == n * bs) { /* square matrix */ 1979 nlocal = m; 1980 } else { 1981 nlocal = n / size + ((n % size) > rank); 1982 } 1983 } else { 1984 nlocal = csize / bs; 1985 } 1986 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1987 rstart = rend - nlocal; 1988 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1989 1990 /* next, compute all the lengths */ 1991 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1992 for (i = 0; i < m; i++) { 1993 jend = ii[i + 1] - ii[i]; 1994 olen = 0; 1995 dlen = 0; 1996 for (j = 0; j < jend; j++) { 1997 if (*jj < rstart || *jj >= rend) olen++; 1998 else dlen++; 1999 jj++; 2000 } 2001 olens[i] = olen; 2002 dlens[i] = dlen; 2003 } 2004 PetscCall(MatCreate(comm, &M)); 2005 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2006 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2007 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2009 PetscCall(PetscFree2(dlens, olens)); 2010 } else { 2011 PetscInt ml, nl; 2012 2013 M = *newmat; 2014 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2015 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2016 PetscCall(MatZeroEntries(M)); 2017 /* 2018 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2019 rather than the slower MatSetValues(). 2020 */ 2021 M->was_assembled = PETSC_TRUE; 2022 M->assembled = PETSC_FALSE; 2023 } 2024 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2025 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2026 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2027 ii = aij->i; 2028 jj = aij->j; 2029 aa = aij->a; 2030 for (i = 0; i < m; i++) { 2031 row = rstart / bs + i; 2032 nz = ii[i + 1] - ii[i]; 2033 cwork = jj; 2034 jj += nz; 2035 vwork = aa; 2036 aa += nz * bs * bs; 2037 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2038 } 2039 2040 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2041 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2042 *newmat = M; 2043 2044 /* save submatrix used in processor for next request */ 2045 if (call == MAT_INITIAL_MATRIX) { 2046 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2047 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2048 } 2049 PetscFunctionReturn(PETSC_SUCCESS); 2050 } 2051 2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2053 { 2054 MPI_Comm comm, pcomm; 2055 PetscInt clocal_size, nrows; 2056 const PetscInt *rows; 2057 PetscMPIInt size; 2058 IS crowp, lcolp; 2059 2060 PetscFunctionBegin; 2061 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2062 /* make a collective version of 'rowp' */ 2063 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2064 if (pcomm == comm) { 2065 crowp = rowp; 2066 } else { 2067 PetscCall(ISGetSize(rowp, &nrows)); 2068 PetscCall(ISGetIndices(rowp, &rows)); 2069 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2070 PetscCall(ISRestoreIndices(rowp, &rows)); 2071 } 2072 PetscCall(ISSetPermutation(crowp)); 2073 /* make a local version of 'colp' */ 2074 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2075 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2076 if (size == 1) { 2077 lcolp = colp; 2078 } else { 2079 PetscCall(ISAllGather(colp, &lcolp)); 2080 } 2081 PetscCall(ISSetPermutation(lcolp)); 2082 /* now we just get the submatrix */ 2083 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2084 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2085 /* clean up */ 2086 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2087 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2092 { 2093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2094 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2095 2096 PetscFunctionBegin; 2097 if (nghosts) *nghosts = B->nbs; 2098 if (ghosts) *ghosts = baij->garray; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2103 { 2104 Mat B; 2105 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2106 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2107 Mat_SeqAIJ *b; 2108 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2109 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2110 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2111 2112 PetscFunctionBegin; 2113 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2114 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2115 2116 /* Tell every processor the number of nonzeros per row */ 2117 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2118 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2119 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2120 displs = recvcounts + size; 2121 for (i = 0; i < size; i++) { 2122 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2123 displs[i] = A->rmap->range[i] / bs; 2124 } 2125 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2126 /* Create the sequential matrix of the same type as the local block diagonal */ 2127 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2128 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2129 PetscCall(MatSetType(B, MATSEQAIJ)); 2130 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2131 b = (Mat_SeqAIJ *)B->data; 2132 2133 /* Copy my part of matrix column indices over */ 2134 sendcount = ad->nz + bd->nz; 2135 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2136 a_jsendbuf = ad->j; 2137 b_jsendbuf = bd->j; 2138 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2139 cnt = 0; 2140 for (i = 0; i < n; i++) { 2141 /* put in lower diagonal portion */ 2142 m = bd->i[i + 1] - bd->i[i]; 2143 while (m > 0) { 2144 /* is it above diagonal (in bd (compressed) numbering) */ 2145 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2146 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2147 m--; 2148 } 2149 2150 /* put in diagonal portion */ 2151 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2152 2153 /* put in upper diagonal portion */ 2154 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2155 } 2156 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2157 2158 /* Gather all column indices to all processors */ 2159 for (i = 0; i < size; i++) { 2160 recvcounts[i] = 0; 2161 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2162 } 2163 displs[0] = 0; 2164 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2165 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2166 /* Assemble the matrix into useable form (note numerical values not yet set) */ 2167 /* set the b->ilen (length of each row) values */ 2168 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2169 /* set the b->i indices */ 2170 b->i[0] = 0; 2171 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2172 PetscCall(PetscFree(lens)); 2173 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2175 PetscCall(PetscFree(recvcounts)); 2176 2177 PetscCall(MatPropagateSymmetryOptions(A, B)); 2178 *newmat = B; 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2183 { 2184 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2185 Vec bb1 = NULL; 2186 2187 PetscFunctionBegin; 2188 if (flag == SOR_APPLY_UPPER) { 2189 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2194 2195 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2196 if (flag & SOR_ZERO_INITIAL_GUESS) { 2197 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2198 its--; 2199 } 2200 2201 while (its--) { 2202 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2204 2205 /* update rhs: bb1 = bb - B*x */ 2206 PetscCall(VecScale(mat->lvec, -1.0)); 2207 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2208 2209 /* local sweep */ 2210 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2211 } 2212 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2213 if (flag & SOR_ZERO_INITIAL_GUESS) { 2214 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2215 its--; 2216 } 2217 while (its--) { 2218 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2220 2221 /* update rhs: bb1 = bb - B*x */ 2222 PetscCall(VecScale(mat->lvec, -1.0)); 2223 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2224 2225 /* local sweep */ 2226 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2227 } 2228 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2229 if (flag & SOR_ZERO_INITIAL_GUESS) { 2230 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2231 its--; 2232 } 2233 while (its--) { 2234 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2236 2237 /* update rhs: bb1 = bb - B*x */ 2238 PetscCall(VecScale(mat->lvec, -1.0)); 2239 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2240 2241 /* local sweep */ 2242 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2243 } 2244 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2245 2246 PetscCall(VecDestroy(&bb1)); 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2251 { 2252 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2253 PetscInt m, N, i, *garray = aij->garray; 2254 PetscInt ib, jb, bs = A->rmap->bs; 2255 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2256 MatScalar *a_val = a_aij->a; 2257 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2258 MatScalar *b_val = b_aij->a; 2259 PetscReal *work; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatGetSize(A, &m, &N)); 2263 PetscCall(PetscCalloc1(N, &work)); 2264 if (type == NORM_2) { 2265 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2269 a_val++; 2270 } 2271 } 2272 } 2273 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2277 b_val++; 2278 } 2279 } 2280 } 2281 } else if (type == NORM_1) { 2282 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2286 a_val++; 2287 } 2288 } 2289 } 2290 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2294 b_val++; 2295 } 2296 } 2297 } 2298 } else if (type == NORM_INFINITY) { 2299 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2304 a_val++; 2305 } 2306 } 2307 } 2308 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 int col = garray[b_aij->j[i]] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2313 b_val++; 2314 } 2315 } 2316 } 2317 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2318 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2335 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2344 for (jb = 0; jb < bs; jb++) { 2345 for (ib = 0; ib < bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2352 if (type == NORM_INFINITY) { 2353 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2354 } else { 2355 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2356 } 2357 PetscCall(PetscFree(work)); 2358 if (type == NORM_2) { 2359 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2360 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2361 for (i = 0; i < N; i++) reductions[i] /= m; 2362 } 2363 PetscFunctionReturn(PETSC_SUCCESS); 2364 } 2365 2366 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2367 { 2368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2369 2370 PetscFunctionBegin; 2371 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2372 A->factorerrortype = a->A->factorerrortype; 2373 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2374 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2375 PetscFunctionReturn(PETSC_SUCCESS); 2376 } 2377 2378 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2379 { 2380 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2381 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2382 2383 PetscFunctionBegin; 2384 if (!Y->preallocated) { 2385 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2386 } else if (!aij->nz) { 2387 PetscInt nonew = aij->nonew; 2388 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2389 aij->nonew = nonew; 2390 } 2391 PetscCall(MatShift_Basic(Y, a)); 2392 PetscFunctionReturn(PETSC_SUCCESS); 2393 } 2394 2395 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2396 { 2397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2398 2399 PetscFunctionBegin; 2400 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2401 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2402 if (d) { 2403 PetscInt rstart; 2404 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2405 *d += rstart / A->rmap->bs; 2406 } 2407 PetscFunctionReturn(PETSC_SUCCESS); 2408 } 2409 2410 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2411 { 2412 PetscFunctionBegin; 2413 *a = ((Mat_MPIBAIJ *)A->data)->A; 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2418 { 2419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2420 2421 PetscFunctionBegin; 2422 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2424 PetscFunctionReturn(PETSC_SUCCESS); 2425 } 2426 2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2428 MatGetRow_MPIBAIJ, 2429 MatRestoreRow_MPIBAIJ, 2430 MatMult_MPIBAIJ, 2431 /* 4*/ MatMultAdd_MPIBAIJ, 2432 MatMultTranspose_MPIBAIJ, 2433 MatMultTransposeAdd_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 /*10*/ NULL, 2438 NULL, 2439 NULL, 2440 MatSOR_MPIBAIJ, 2441 MatTranspose_MPIBAIJ, 2442 /*15*/ MatGetInfo_MPIBAIJ, 2443 MatEqual_MPIBAIJ, 2444 MatGetDiagonal_MPIBAIJ, 2445 MatDiagonalScale_MPIBAIJ, 2446 MatNorm_MPIBAIJ, 2447 /*20*/ MatAssemblyBegin_MPIBAIJ, 2448 MatAssemblyEnd_MPIBAIJ, 2449 MatSetOption_MPIBAIJ, 2450 MatZeroEntries_MPIBAIJ, 2451 /*24*/ MatZeroRows_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*29*/ MatSetUp_MPI_Hash, 2457 NULL, 2458 NULL, 2459 MatGetDiagonalBlock_MPIBAIJ, 2460 NULL, 2461 /*34*/ MatDuplicate_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 NULL, 2465 NULL, 2466 /*39*/ MatAXPY_MPIBAIJ, 2467 MatCreateSubMatrices_MPIBAIJ, 2468 MatIncreaseOverlap_MPIBAIJ, 2469 MatGetValues_MPIBAIJ, 2470 MatCopy_MPIBAIJ, 2471 /*44*/ NULL, 2472 MatScale_MPIBAIJ, 2473 MatShift_MPIBAIJ, 2474 NULL, 2475 MatZeroRowsColumns_MPIBAIJ, 2476 /*49*/ NULL, 2477 NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 /*54*/ MatFDColoringCreate_MPIXAIJ, 2482 NULL, 2483 MatSetUnfactored_MPIBAIJ, 2484 MatPermute_MPIBAIJ, 2485 MatSetValuesBlocked_MPIBAIJ, 2486 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2487 MatDestroy_MPIBAIJ, 2488 MatView_MPIBAIJ, 2489 NULL, 2490 NULL, 2491 /*64*/ NULL, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*74*/ NULL, 2502 MatFDColoringApply_BAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*79*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 MatLoad_MPIBAIJ, 2511 /*84*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*89*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*94*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*99*/ NULL, 2527 NULL, 2528 NULL, 2529 MatConjugate_MPIBAIJ, 2530 NULL, 2531 /*104*/ NULL, 2532 MatRealPart_MPIBAIJ, 2533 MatImaginaryPart_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*109*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatMissingDiagonal_MPIBAIJ, 2541 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2542 NULL, 2543 MatGetGhosts_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*119*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 MatGetMultiProcBlock_MPIBAIJ, 2551 /*124*/ NULL, 2552 MatGetColumnReductions_MPIBAIJ, 2553 MatInvertBlockDiagonal_MPIBAIJ, 2554 NULL, 2555 NULL, 2556 /*129*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*134*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*139*/ MatSetBlockSizes_Default, 2567 NULL, 2568 NULL, 2569 MatFDColoringSetUp_MPIXAIJ, 2570 NULL, 2571 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2572 NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 /*150*/ NULL, 2578 MatEliminateZeros_MPIBAIJ}; 2579 2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2582 2583 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2584 { 2585 PetscInt m, rstart, cstart, cend; 2586 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2587 const PetscInt *JJ = NULL; 2588 PetscScalar *values = NULL; 2589 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2590 PetscBool nooffprocentries; 2591 2592 PetscFunctionBegin; 2593 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2594 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2595 PetscCall(PetscLayoutSetUp(B->rmap)); 2596 PetscCall(PetscLayoutSetUp(B->cmap)); 2597 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2598 m = B->rmap->n / bs; 2599 rstart = B->rmap->rstart / bs; 2600 cstart = B->cmap->rstart / bs; 2601 cend = B->cmap->rend / bs; 2602 2603 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2604 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2605 for (i = 0; i < m; i++) { 2606 nz = ii[i + 1] - ii[i]; 2607 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2608 nz_max = PetscMax(nz_max, nz); 2609 dlen = 0; 2610 olen = 0; 2611 JJ = jj + ii[i]; 2612 for (j = 0; j < nz; j++) { 2613 if (*JJ < cstart || *JJ >= cend) olen++; 2614 else dlen++; 2615 JJ++; 2616 } 2617 d_nnz[i] = dlen; 2618 o_nnz[i] = olen; 2619 } 2620 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2621 PetscCall(PetscFree2(d_nnz, o_nnz)); 2622 2623 values = (PetscScalar *)V; 2624 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2625 for (i = 0; i < m; i++) { 2626 PetscInt row = i + rstart; 2627 PetscInt ncols = ii[i + 1] - ii[i]; 2628 const PetscInt *icols = jj + ii[i]; 2629 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2630 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2631 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2632 } else { /* block ordering does not match so we can only insert one block at a time. */ 2633 PetscInt j; 2634 for (j = 0; j < ncols; j++) { 2635 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2637 } 2638 } 2639 } 2640 2641 if (!V) PetscCall(PetscFree(values)); 2642 nooffprocentries = B->nooffprocentries; 2643 B->nooffprocentries = PETSC_TRUE; 2644 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2645 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2646 B->nooffprocentries = nooffprocentries; 2647 2648 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2649 PetscFunctionReturn(PETSC_SUCCESS); 2650 } 2651 2652 /*@C 2653 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2654 2655 Collective 2656 2657 Input Parameters: 2658 + B - the matrix 2659 . bs - the block size 2660 . i - the indices into `j` for the start of each local row (starts with zero) 2661 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2662 - v - optional values in the matrix 2663 2664 Level: advanced 2665 2666 Notes: 2667 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2668 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2669 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2670 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2671 block column and the second index is over columns within a block. 2672 2673 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2674 2675 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2676 @*/ 2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2681 PetscValidType(B, 1); 2682 PetscValidLogicalCollectiveInt(B, bs, 2); 2683 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2684 PetscFunctionReturn(PETSC_SUCCESS); 2685 } 2686 2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2688 { 2689 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2690 PetscInt i; 2691 PetscMPIInt size; 2692 2693 PetscFunctionBegin; 2694 if (B->hash_active) { 2695 B->ops[0] = b->cops; 2696 B->hash_active = PETSC_FALSE; 2697 } 2698 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2699 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2700 PetscCall(PetscLayoutSetUp(B->rmap)); 2701 PetscCall(PetscLayoutSetUp(B->cmap)); 2702 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2703 2704 if (d_nnz) { 2705 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2706 } 2707 if (o_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2709 } 2710 2711 b->bs2 = bs * bs; 2712 b->mbs = B->rmap->n / bs; 2713 b->nbs = B->cmap->n / bs; 2714 b->Mbs = B->rmap->N / bs; 2715 b->Nbs = B->cmap->N / bs; 2716 2717 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2718 b->rstartbs = B->rmap->rstart / bs; 2719 b->rendbs = B->rmap->rend / bs; 2720 b->cstartbs = B->cmap->rstart / bs; 2721 b->cendbs = B->cmap->rend / bs; 2722 2723 #if defined(PETSC_USE_CTABLE) 2724 PetscCall(PetscHMapIDestroy(&b->colmap)); 2725 #else 2726 PetscCall(PetscFree(b->colmap)); 2727 #endif 2728 PetscCall(PetscFree(b->garray)); 2729 PetscCall(VecDestroy(&b->lvec)); 2730 PetscCall(VecScatterDestroy(&b->Mvctx)); 2731 2732 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2733 PetscCall(MatDestroy(&b->B)); 2734 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2735 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2736 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2737 2738 PetscCall(MatDestroy(&b->A)); 2739 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2740 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2741 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2742 2743 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2744 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2745 B->preallocated = PETSC_TRUE; 2746 B->was_assembled = PETSC_FALSE; 2747 B->assembled = PETSC_FALSE; 2748 PetscFunctionReturn(PETSC_SUCCESS); 2749 } 2750 2751 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2752 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2753 2754 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2755 { 2756 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2757 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2758 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2759 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2760 2761 PetscFunctionBegin; 2762 PetscCall(PetscMalloc1(M + 1, &ii)); 2763 ii[0] = 0; 2764 for (i = 0; i < M; i++) { 2765 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2766 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2767 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2768 /* remove one from count of matrix has diagonal */ 2769 for (j = id[i]; j < id[i + 1]; j++) { 2770 if (jd[j] == i) { 2771 ii[i + 1]--; 2772 break; 2773 } 2774 } 2775 } 2776 PetscCall(PetscMalloc1(ii[M], &jj)); 2777 cnt = 0; 2778 for (i = 0; i < M; i++) { 2779 for (j = io[i]; j < io[i + 1]; j++) { 2780 if (garray[jo[j]] > rstart) break; 2781 jj[cnt++] = garray[jo[j]]; 2782 } 2783 for (k = id[i]; k < id[i + 1]; k++) { 2784 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2785 } 2786 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2787 } 2788 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2789 PetscFunctionReturn(PETSC_SUCCESS); 2790 } 2791 2792 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2793 2794 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2795 2796 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2797 { 2798 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2799 Mat_MPIAIJ *b; 2800 Mat B; 2801 2802 PetscFunctionBegin; 2803 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2804 2805 if (reuse == MAT_REUSE_MATRIX) { 2806 B = *newmat; 2807 } else { 2808 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2809 PetscCall(MatSetType(B, MATMPIAIJ)); 2810 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2811 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2812 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2813 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2814 } 2815 b = (Mat_MPIAIJ *)B->data; 2816 2817 if (reuse == MAT_REUSE_MATRIX) { 2818 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2819 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2820 } else { 2821 PetscInt *garray = a->garray; 2822 Mat_SeqAIJ *bB; 2823 PetscInt bs, nnz; 2824 PetscCall(MatDestroy(&b->A)); 2825 PetscCall(MatDestroy(&b->B)); 2826 /* just clear out the data structure */ 2827 PetscCall(MatDisAssemble_MPIAIJ(B)); 2828 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2829 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2830 2831 /* Global numbering for b->B columns */ 2832 bB = (Mat_SeqAIJ *)b->B->data; 2833 bs = A->rmap->bs; 2834 nnz = bB->i[A->rmap->n]; 2835 for (PetscInt k = 0; k < nnz; k++) { 2836 PetscInt bj = bB->j[k] / bs; 2837 PetscInt br = bB->j[k] % bs; 2838 bB->j[k] = garray[bj] * bs + br; 2839 } 2840 } 2841 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2842 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2843 2844 if (reuse == MAT_INPLACE_MATRIX) { 2845 PetscCall(MatHeaderReplace(A, &B)); 2846 } else { 2847 *newmat = B; 2848 } 2849 PetscFunctionReturn(PETSC_SUCCESS); 2850 } 2851 2852 /*MC 2853 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2854 2855 Options Database Keys: 2856 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2857 . -mat_block_size <bs> - set the blocksize used to store the matrix 2858 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2859 - -mat_use_hash_table <fact> - set hash table factor 2860 2861 Level: beginner 2862 2863 Note: 2864 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2865 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2866 2867 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2868 M*/ 2869 2870 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2871 2872 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2873 { 2874 Mat_MPIBAIJ *b; 2875 PetscBool flg = PETSC_FALSE; 2876 2877 PetscFunctionBegin; 2878 PetscCall(PetscNew(&b)); 2879 B->data = (void *)b; 2880 B->ops[0] = MatOps_Values; 2881 B->assembled = PETSC_FALSE; 2882 2883 B->insertmode = NOT_SET_VALUES; 2884 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2885 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2886 2887 /* build local table of row and column ownerships */ 2888 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2889 2890 /* build cache for off array entries formed */ 2891 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2892 2893 b->donotstash = PETSC_FALSE; 2894 b->colmap = NULL; 2895 b->garray = NULL; 2896 b->roworiented = PETSC_TRUE; 2897 2898 /* stuff used in block assembly */ 2899 b->barray = NULL; 2900 2901 /* stuff used for matrix vector multiply */ 2902 b->lvec = NULL; 2903 b->Mvctx = NULL; 2904 2905 /* stuff for MatGetRow() */ 2906 b->rowindices = NULL; 2907 b->rowvalues = NULL; 2908 b->getrowactive = PETSC_FALSE; 2909 2910 /* hash table stuff */ 2911 b->ht = NULL; 2912 b->hd = NULL; 2913 b->ht_size = 0; 2914 b->ht_flag = PETSC_FALSE; 2915 b->ht_fact = 0; 2916 b->ht_total_ct = 0; 2917 b->ht_insert_ct = 0; 2918 2919 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2920 b->ijonly = PETSC_FALSE; 2921 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2925 #if defined(PETSC_HAVE_HYPRE) 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2927 #endif 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2935 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2936 2937 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2938 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2939 if (flg) { 2940 PetscReal fact = 1.39; 2941 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2942 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2943 if (fact <= 1.0) fact = 1.39; 2944 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2945 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2946 } 2947 PetscOptionsEnd(); 2948 PetscFunctionReturn(PETSC_SUCCESS); 2949 } 2950 2951 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2952 /*MC 2953 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2954 2955 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2956 and `MATMPIBAIJ` otherwise. 2957 2958 Options Database Keys: 2959 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2960 2961 Level: beginner 2962 2963 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2964 M*/ 2965 2966 /*@C 2967 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2968 (block compressed row). 2969 2970 Collective 2971 2972 Input Parameters: 2973 + B - the matrix 2974 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2975 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2976 . d_nz - number of block nonzeros per block row in diagonal portion of local 2977 submatrix (same for all local rows) 2978 . d_nnz - array containing the number of block nonzeros in the various block rows 2979 of the in diagonal portion of the local (possibly different for each block 2980 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2981 set it even if it is zero. 2982 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2983 submatrix (same for all local rows). 2984 - o_nnz - array containing the number of nonzeros in the various block rows of the 2985 off-diagonal portion of the local submatrix (possibly different for 2986 each block row) or `NULL`. 2987 2988 If the *_nnz parameter is given then the *_nz parameter is ignored 2989 2990 Options Database Keys: 2991 + -mat_block_size - size of the blocks to use 2992 - -mat_use_hash_table <fact> - set hash table factor 2993 2994 Level: intermediate 2995 2996 Notes: 2997 For good matrix assembly performance 2998 the user should preallocate the matrix storage by setting the parameters 2999 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3000 performance can be increased by more than a factor of 50. 3001 3002 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3003 than it must be used on all processors that share the object for that argument. 3004 3005 Storage Information: 3006 For a square global matrix we define each processor's diagonal portion 3007 to be its local rows and the corresponding columns (a square submatrix); 3008 each processor's off-diagonal portion encompasses the remainder of the 3009 local matrix (a rectangular submatrix). 3010 3011 The user can specify preallocated storage for the diagonal part of 3012 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3013 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3014 memory allocation. Likewise, specify preallocated storage for the 3015 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3016 3017 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3018 the figure below we depict these three local rows and all columns (0-11). 3019 3020 .vb 3021 0 1 2 3 4 5 6 7 8 9 10 11 3022 -------------------------- 3023 row 3 |o o o d d d o o o o o o 3024 row 4 |o o o d d d o o o o o o 3025 row 5 |o o o d d d o o o o o o 3026 -------------------------- 3027 .ve 3028 3029 Thus, any entries in the d locations are stored in the d (diagonal) 3030 submatrix, and any entries in the o locations are stored in the 3031 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3032 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3033 3034 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3035 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3036 In general, for PDE problems in which most nonzeros are near the diagonal, 3037 one expects `d_nz` >> `o_nz`. 3038 3039 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3040 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3041 You can also run with the option `-info` and look for messages with the string 3042 malloc in them to see if additional memory allocation was needed. 3043 3044 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3045 @*/ 3046 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3047 { 3048 PetscFunctionBegin; 3049 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3050 PetscValidType(B, 1); 3051 PetscValidLogicalCollectiveInt(B, bs, 2); 3052 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3053 PetscFunctionReturn(PETSC_SUCCESS); 3054 } 3055 3056 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3057 /*@C 3058 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3059 (block compressed row). 3060 3061 Collective 3062 3063 Input Parameters: 3064 + comm - MPI communicator 3065 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3066 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3067 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3068 This value should be the same as the local size used in creating the 3069 y vector for the matrix-vector product y = Ax. 3070 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3071 This value should be the same as the local size used in creating the 3072 x vector for the matrix-vector product y = Ax. 3073 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3074 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3075 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3076 submatrix (same for all local rows) 3077 . d_nnz - array containing the number of nonzero blocks in the various block rows 3078 of the in diagonal portion of the local (possibly different for each block 3079 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3080 and set it even if it is zero. 3081 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3082 submatrix (same for all local rows). 3083 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3084 off-diagonal portion of the local submatrix (possibly different for 3085 each block row) or NULL. 3086 3087 Output Parameter: 3088 . A - the matrix 3089 3090 Options Database Keys: 3091 + -mat_block_size - size of the blocks to use 3092 - -mat_use_hash_table <fact> - set hash table factor 3093 3094 Level: intermediate 3095 3096 Notes: 3097 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3098 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3099 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3100 3101 For good matrix assembly performance 3102 the user should preallocate the matrix storage by setting the parameters 3103 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3104 performance can be increased by more than a factor of 50. 3105 3106 If the *_nnz parameter is given then the *_nz parameter is ignored 3107 3108 A nonzero block is any block that as 1 or more nonzeros in it 3109 3110 The user MUST specify either the local or global matrix dimensions 3111 (possibly both). 3112 3113 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3114 than it must be used on all processors that share the object for that argument. 3115 3116 Storage Information: 3117 For a square global matrix we define each processor's diagonal portion 3118 to be its local rows and the corresponding columns (a square submatrix); 3119 each processor's off-diagonal portion encompasses the remainder of the 3120 local matrix (a rectangular submatrix). 3121 3122 The user can specify preallocated storage for the diagonal part of 3123 the local submatrix with either d_nz or d_nnz (not both). Set 3124 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3125 memory allocation. Likewise, specify preallocated storage for the 3126 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3127 3128 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3129 the figure below we depict these three local rows and all columns (0-11). 3130 3131 .vb 3132 0 1 2 3 4 5 6 7 8 9 10 11 3133 -------------------------- 3134 row 3 |o o o d d d o o o o o o 3135 row 4 |o o o d d d o o o o o o 3136 row 5 |o o o d d d o o o o o o 3137 -------------------------- 3138 .ve 3139 3140 Thus, any entries in the d locations are stored in the d (diagonal) 3141 submatrix, and any entries in the o locations are stored in the 3142 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3143 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3144 3145 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3146 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3147 In general, for PDE problems in which most nonzeros are near the diagonal, 3148 one expects `d_nz` >> `o_nz`. 3149 3150 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3151 @*/ 3152 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3153 { 3154 PetscMPIInt size; 3155 3156 PetscFunctionBegin; 3157 PetscCall(MatCreate(comm, A)); 3158 PetscCall(MatSetSizes(*A, m, n, M, N)); 3159 PetscCallMPI(MPI_Comm_size(comm, &size)); 3160 if (size > 1) { 3161 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3162 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3163 } else { 3164 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3165 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3166 } 3167 PetscFunctionReturn(PETSC_SUCCESS); 3168 } 3169 3170 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3171 { 3172 Mat mat; 3173 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3174 PetscInt len = 0; 3175 3176 PetscFunctionBegin; 3177 *newmat = NULL; 3178 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3179 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3180 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3181 3182 mat->factortype = matin->factortype; 3183 mat->preallocated = PETSC_TRUE; 3184 mat->assembled = PETSC_TRUE; 3185 mat->insertmode = NOT_SET_VALUES; 3186 3187 a = (Mat_MPIBAIJ *)mat->data; 3188 mat->rmap->bs = matin->rmap->bs; 3189 a->bs2 = oldmat->bs2; 3190 a->mbs = oldmat->mbs; 3191 a->nbs = oldmat->nbs; 3192 a->Mbs = oldmat->Mbs; 3193 a->Nbs = oldmat->Nbs; 3194 3195 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3196 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3197 3198 a->size = oldmat->size; 3199 a->rank = oldmat->rank; 3200 a->donotstash = oldmat->donotstash; 3201 a->roworiented = oldmat->roworiented; 3202 a->rowindices = NULL; 3203 a->rowvalues = NULL; 3204 a->getrowactive = PETSC_FALSE; 3205 a->barray = NULL; 3206 a->rstartbs = oldmat->rstartbs; 3207 a->rendbs = oldmat->rendbs; 3208 a->cstartbs = oldmat->cstartbs; 3209 a->cendbs = oldmat->cendbs; 3210 3211 /* hash table stuff */ 3212 a->ht = NULL; 3213 a->hd = NULL; 3214 a->ht_size = 0; 3215 a->ht_flag = oldmat->ht_flag; 3216 a->ht_fact = oldmat->ht_fact; 3217 a->ht_total_ct = 0; 3218 a->ht_insert_ct = 0; 3219 3220 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3221 if (oldmat->colmap) { 3222 #if defined(PETSC_USE_CTABLE) 3223 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3224 #else 3225 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3226 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3227 #endif 3228 } else a->colmap = NULL; 3229 3230 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3231 PetscCall(PetscMalloc1(len, &a->garray)); 3232 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3233 } else a->garray = NULL; 3234 3235 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3236 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3237 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3238 3239 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3240 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3241 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3242 *newmat = mat; 3243 PetscFunctionReturn(PETSC_SUCCESS); 3244 } 3245 3246 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3247 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3248 { 3249 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3250 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3251 PetscScalar *matvals; 3252 3253 PetscFunctionBegin; 3254 PetscCall(PetscViewerSetUp(viewer)); 3255 3256 /* read in matrix header */ 3257 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3258 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3259 M = header[1]; 3260 N = header[2]; 3261 nz = header[3]; 3262 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3263 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3264 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3265 3266 /* set block sizes from the viewer's .info file */ 3267 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3268 /* set local sizes if not set already */ 3269 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3270 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3271 /* set global sizes if not set already */ 3272 if (mat->rmap->N < 0) mat->rmap->N = M; 3273 if (mat->cmap->N < 0) mat->cmap->N = N; 3274 PetscCall(PetscLayoutSetUp(mat->rmap)); 3275 PetscCall(PetscLayoutSetUp(mat->cmap)); 3276 3277 /* check if the matrix sizes are correct */ 3278 PetscCall(MatGetSize(mat, &rows, &cols)); 3279 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3280 PetscCall(MatGetBlockSize(mat, &bs)); 3281 PetscCall(MatGetLocalSize(mat, &m, &n)); 3282 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3283 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3284 mbs = m / bs; 3285 nbs = n / bs; 3286 3287 /* read in row lengths and build row indices */ 3288 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3289 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3290 rowidxs[0] = 0; 3291 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3292 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3293 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3294 3295 /* read in column indices and matrix values */ 3296 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3297 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3298 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3299 3300 { /* preallocate matrix storage */ 3301 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3302 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3303 PetscBool sbaij, done; 3304 PetscInt *d_nnz, *o_nnz; 3305 3306 PetscCall(PetscBTCreate(nbs, &bt)); 3307 PetscCall(PetscHSetICreate(&ht)); 3308 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3309 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3310 for (i = 0; i < mbs; i++) { 3311 PetscCall(PetscBTMemzero(nbs, bt)); 3312 PetscCall(PetscHSetIClear(ht)); 3313 for (k = 0; k < bs; k++) { 3314 PetscInt row = bs * i + k; 3315 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3316 PetscInt col = colidxs[j]; 3317 if (!sbaij || col >= row) { 3318 if (col >= cs && col < ce) { 3319 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3320 } else { 3321 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3322 if (done) o_nnz[i]++; 3323 } 3324 } 3325 } 3326 } 3327 } 3328 PetscCall(PetscBTDestroy(&bt)); 3329 PetscCall(PetscHSetIDestroy(&ht)); 3330 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3331 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3332 PetscCall(PetscFree2(d_nnz, o_nnz)); 3333 } 3334 3335 /* store matrix values */ 3336 for (i = 0; i < m; i++) { 3337 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3338 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3339 } 3340 3341 PetscCall(PetscFree(rowidxs)); 3342 PetscCall(PetscFree2(colidxs, matvals)); 3343 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3344 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3345 PetscFunctionReturn(PETSC_SUCCESS); 3346 } 3347 3348 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3349 { 3350 PetscBool isbinary; 3351 3352 PetscFunctionBegin; 3353 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3354 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3355 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3356 PetscFunctionReturn(PETSC_SUCCESS); 3357 } 3358 3359 /*@ 3360 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3361 3362 Input Parameters: 3363 + mat - the matrix 3364 - fact - factor 3365 3366 Options Database Key: 3367 . -mat_use_hash_table <fact> - provide the factor 3368 3369 Level: advanced 3370 3371 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3372 @*/ 3373 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3374 { 3375 PetscFunctionBegin; 3376 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3377 PetscFunctionReturn(PETSC_SUCCESS); 3378 } 3379 3380 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3381 { 3382 Mat_MPIBAIJ *baij; 3383 3384 PetscFunctionBegin; 3385 baij = (Mat_MPIBAIJ *)mat->data; 3386 baij->ht_fact = fact; 3387 PetscFunctionReturn(PETSC_SUCCESS); 3388 } 3389 3390 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3391 { 3392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3393 PetscBool flg; 3394 3395 PetscFunctionBegin; 3396 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3397 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3398 if (Ad) *Ad = a->A; 3399 if (Ao) *Ao = a->B; 3400 if (colmap) *colmap = a->garray; 3401 PetscFunctionReturn(PETSC_SUCCESS); 3402 } 3403 3404 /* 3405 Special version for direct calls from Fortran (to eliminate two function call overheads 3406 */ 3407 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3408 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3409 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3410 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3411 #endif 3412 3413 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3414 /*@C 3415 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3416 3417 Collective 3418 3419 Input Parameters: 3420 + matin - the matrix 3421 . min - number of input rows 3422 . im - input rows 3423 . nin - number of input columns 3424 . in - input columns 3425 . v - numerical values input 3426 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3427 3428 Level: advanced 3429 3430 Developer Notes: 3431 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3432 3433 .seealso: `Mat`, `MatSetValuesBlocked()` 3434 @*/ 3435 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3436 { 3437 /* convert input arguments to C version */ 3438 Mat mat = *matin; 3439 PetscInt m = *min, n = *nin; 3440 InsertMode addv = *addvin; 3441 3442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3443 const MatScalar *value; 3444 MatScalar *barray = baij->barray; 3445 PetscBool roworiented = baij->roworiented; 3446 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3447 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3448 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3449 3450 PetscFunctionBegin; 3451 /* tasks normally handled by MatSetValuesBlocked() */ 3452 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3453 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3454 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3455 if (mat->assembled) { 3456 mat->was_assembled = PETSC_TRUE; 3457 mat->assembled = PETSC_FALSE; 3458 } 3459 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3460 3461 if (!barray) { 3462 PetscCall(PetscMalloc1(bs2, &barray)); 3463 baij->barray = barray; 3464 } 3465 3466 if (roworiented) stepval = (n - 1) * bs; 3467 else stepval = (m - 1) * bs; 3468 3469 for (i = 0; i < m; i++) { 3470 if (im[i] < 0) continue; 3471 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3472 if (im[i] >= rstart && im[i] < rend) { 3473 row = im[i] - rstart; 3474 for (j = 0; j < n; j++) { 3475 /* If NumCol = 1 then a copy is not required */ 3476 if ((roworiented) && (n == 1)) { 3477 barray = (MatScalar *)v + i * bs2; 3478 } else if ((!roworiented) && (m == 1)) { 3479 barray = (MatScalar *)v + j * bs2; 3480 } else { /* Here a copy is required */ 3481 if (roworiented) { 3482 value = v + i * (stepval + bs) * bs + j * bs; 3483 } else { 3484 value = v + j * (stepval + bs) * bs + i * bs; 3485 } 3486 for (ii = 0; ii < bs; ii++, value += stepval) { 3487 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3488 } 3489 barray -= bs2; 3490 } 3491 3492 if (in[j] >= cstart && in[j] < cend) { 3493 col = in[j] - cstart; 3494 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3495 } else if (in[j] < 0) { 3496 continue; 3497 } else { 3498 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3499 if (mat->was_assembled) { 3500 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3501 3502 #if defined(PETSC_USE_DEBUG) 3503 #if defined(PETSC_USE_CTABLE) 3504 { 3505 PetscInt data; 3506 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3507 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3508 } 3509 #else 3510 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3511 #endif 3512 #endif 3513 #if defined(PETSC_USE_CTABLE) 3514 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3515 col = (col - 1) / bs; 3516 #else 3517 col = (baij->colmap[in[j]] - 1) / bs; 3518 #endif 3519 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3520 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3521 col = in[j]; 3522 } 3523 } else col = in[j]; 3524 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3525 } 3526 } 3527 } else { 3528 if (!baij->donotstash) { 3529 if (roworiented) { 3530 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3531 } else { 3532 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3533 } 3534 } 3535 } 3536 } 3537 3538 /* task normally handled by MatSetValuesBlocked() */ 3539 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3540 PetscFunctionReturn(PETSC_SUCCESS); 3541 } 3542 3543 /*@ 3544 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3545 CSR format the local rows. 3546 3547 Collective 3548 3549 Input Parameters: 3550 + comm - MPI communicator 3551 . bs - the block size, only a block size of 1 is supported 3552 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3553 . n - This value should be the same as the local size used in creating the 3554 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3555 calculated if N is given) For square matrices n is almost always m. 3556 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3557 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3558 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3559 . j - column indices 3560 - a - matrix values 3561 3562 Output Parameter: 3563 . mat - the matrix 3564 3565 Level: intermediate 3566 3567 Notes: 3568 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3569 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3570 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3571 3572 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3573 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3574 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3575 with column-major ordering within blocks. 3576 3577 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3578 3579 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3580 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3581 @*/ 3582 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3583 { 3584 PetscFunctionBegin; 3585 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3586 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3587 PetscCall(MatCreate(comm, mat)); 3588 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3589 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3590 PetscCall(MatSetBlockSize(*mat, bs)); 3591 PetscCall(MatSetUp(*mat)); 3592 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3593 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3594 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3595 PetscFunctionReturn(PETSC_SUCCESS); 3596 } 3597 3598 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3599 { 3600 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3601 PetscInt *indx; 3602 PetscScalar *values; 3603 3604 PetscFunctionBegin; 3605 PetscCall(MatGetSize(inmat, &m, &N)); 3606 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3607 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3608 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3609 PetscInt *bindx, rmax = a->rmax, j; 3610 PetscMPIInt rank, size; 3611 3612 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3613 mbs = m / bs; 3614 Nbs = N / cbs; 3615 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3616 nbs = n / cbs; 3617 3618 PetscCall(PetscMalloc1(rmax, &bindx)); 3619 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3620 3621 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3622 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3623 if (rank == size - 1) { 3624 /* Check sum(nbs) = Nbs */ 3625 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3626 } 3627 3628 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3629 for (i = 0; i < mbs; i++) { 3630 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3631 nnz = nnz / bs; 3632 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3633 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3634 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3635 } 3636 PetscCall(PetscFree(bindx)); 3637 3638 PetscCall(MatCreate(comm, outmat)); 3639 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3640 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3641 PetscCall(MatSetType(*outmat, MATBAIJ)); 3642 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3643 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3644 MatPreallocateEnd(dnz, onz); 3645 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3646 } 3647 3648 /* numeric phase */ 3649 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3650 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3651 3652 for (i = 0; i < m; i++) { 3653 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3654 Ii = i + rstart; 3655 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3656 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3657 } 3658 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3659 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3660 PetscFunctionReturn(PETSC_SUCCESS); 3661 } 3662