1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 520 if (idx == size) { 521 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 522 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 523 } 524 } 525 } else if (HT[idx] != key) { 526 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 527 if (idx == size) { 528 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 529 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 530 } 531 } 532 /* A HASH table entry is found, so insert the values at the correct address */ 533 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 534 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 535 } 536 } else if (!baij->donotstash) { 537 if (roworiented) { 538 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 539 } else { 540 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 541 } 542 } 543 } 544 if (PetscDefined(USE_DEBUG)) { 545 baij->ht_total_ct += total_ct; 546 baij->ht_insert_ct += insert_ct; 547 } 548 PetscFunctionReturn(PETSC_SUCCESS); 549 } 550 551 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 552 { 553 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 554 PetscBool roworiented = baij->roworiented; 555 PetscInt i, j, ii, jj, row, col; 556 PetscInt rstart = baij->rstartbs; 557 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 558 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 559 PetscReal tmp; 560 MatScalar **HD = baij->hd, *baij_a; 561 const PetscScalar *v_t, *value; 562 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 563 564 PetscFunctionBegin; 565 if (roworiented) stepval = (n - 1) * bs; 566 else stepval = (m - 1) * bs; 567 568 for (i = 0; i < m; i++) { 569 if (PetscDefined(USE_DEBUG)) { 570 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 571 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 572 } 573 row = im[i]; 574 v_t = v + i * nbs2; 575 if (row >= rstart && row < rend) { 576 for (j = 0; j < n; j++) { 577 col = in[j]; 578 579 /* Look up into the Hash Table */ 580 key = row * Nbs + col + 1; 581 h1 = HASH(size, key, tmp); 582 583 idx = h1; 584 if (PetscDefined(USE_DEBUG)) { 585 total_ct++; 586 insert_ct++; 587 if (HT[idx] != key) { 588 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 589 if (idx == size) { 590 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 591 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 592 } 593 } 594 } else if (HT[idx] != key) { 595 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 596 if (idx == size) { 597 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 598 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 599 } 600 } 601 baij_a = HD[idx]; 602 if (roworiented) { 603 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 604 /* value = v + (i*(stepval+bs)+j)*bs; */ 605 value = v_t; 606 v_t += bs; 607 if (addv == ADD_VALUES) { 608 for (ii = 0; ii < bs; ii++, value += stepval) { 609 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 610 } 611 } else { 612 for (ii = 0; ii < bs; ii++, value += stepval) { 613 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 614 } 615 } 616 } else { 617 value = v + j * (stepval + bs) * bs + i * bs; 618 if (addv == ADD_VALUES) { 619 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 620 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 621 } 622 } else { 623 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 624 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 625 } 626 } 627 } 628 } 629 } else { 630 if (!baij->donotstash) { 631 if (roworiented) { 632 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 633 } else { 634 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 635 } 636 } 637 } 638 } 639 if (PetscDefined(USE_DEBUG)) { 640 baij->ht_total_ct += total_ct; 641 baij->ht_insert_ct += insert_ct; 642 } 643 PetscFunctionReturn(PETSC_SUCCESS); 644 } 645 646 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 647 { 648 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 649 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 650 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 651 652 PetscFunctionBegin; 653 for (i = 0; i < m; i++) { 654 if (idxm[i] < 0) continue; /* negative row */ 655 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 656 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 657 row = idxm[i] - bsrstart; 658 for (j = 0; j < n; j++) { 659 if (idxn[j] < 0) continue; /* negative column */ 660 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 661 if (idxn[j] >= bscstart && idxn[j] < bscend) { 662 col = idxn[j] - bscstart; 663 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 664 } else { 665 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 666 #if defined(PETSC_USE_CTABLE) 667 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 668 data--; 669 #else 670 data = baij->colmap[idxn[j] / bs] - 1; 671 #endif 672 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 673 else { 674 col = data + idxn[j] % bs; 675 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 676 } 677 } 678 } 679 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 680 } 681 PetscFunctionReturn(PETSC_SUCCESS); 682 } 683 684 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 685 { 686 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 687 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 688 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 689 PetscReal sum = 0.0; 690 MatScalar *v; 691 692 PetscFunctionBegin; 693 if (baij->size == 1) { 694 PetscCall(MatNorm(baij->A, type, nrm)); 695 } else { 696 if (type == NORM_FROBENIUS) { 697 v = amat->a; 698 nz = amat->nz * bs2; 699 for (i = 0; i < nz; i++) { 700 sum += PetscRealPart(PetscConj(*v) * (*v)); 701 v++; 702 } 703 v = bmat->a; 704 nz = bmat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 710 *nrm = PetscSqrtReal(*nrm); 711 } else if (type == NORM_1) { /* max column sum */ 712 PetscReal *tmp, *tmp2; 713 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 714 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 715 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 716 v = amat->a; 717 jj = amat->j; 718 for (i = 0; i < amat->nz; i++) { 719 for (j = 0; j < bs; j++) { 720 col = bs * (cstart + *jj) + j; /* column index */ 721 for (row = 0; row < bs; row++) { 722 tmp[col] += PetscAbsScalar(*v); 723 v++; 724 } 725 } 726 jj++; 727 } 728 v = bmat->a; 729 jj = bmat->j; 730 for (i = 0; i < bmat->nz; i++) { 731 for (j = 0; j < bs; j++) { 732 col = bs * garray[*jj] + j; 733 for (row = 0; row < bs; row++) { 734 tmp[col] += PetscAbsScalar(*v); 735 v++; 736 } 737 } 738 jj++; 739 } 740 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 741 *nrm = 0.0; 742 for (j = 0; j < mat->cmap->N; j++) { 743 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 744 } 745 PetscCall(PetscFree(tmp)); 746 PetscCall(PetscFree(tmp2)); 747 } else if (type == NORM_INFINITY) { /* max row sum */ 748 PetscReal *sums; 749 PetscCall(PetscMalloc1(bs, &sums)); 750 sum = 0.0; 751 for (j = 0; j < amat->mbs; j++) { 752 for (row = 0; row < bs; row++) sums[row] = 0.0; 753 v = amat->a + bs2 * amat->i[j]; 754 nz = amat->i[j + 1] - amat->i[j]; 755 for (i = 0; i < nz; i++) { 756 for (col = 0; col < bs; col++) { 757 for (row = 0; row < bs; row++) { 758 sums[row] += PetscAbsScalar(*v); 759 v++; 760 } 761 } 762 } 763 v = bmat->a + bs2 * bmat->i[j]; 764 nz = bmat->i[j + 1] - bmat->i[j]; 765 for (i = 0; i < nz; i++) { 766 for (col = 0; col < bs; col++) { 767 for (row = 0; row < bs; row++) { 768 sums[row] += PetscAbsScalar(*v); 769 v++; 770 } 771 } 772 } 773 for (row = 0; row < bs; row++) { 774 if (sums[row] > sum) sum = sums[row]; 775 } 776 } 777 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 778 PetscCall(PetscFree(sums)); 779 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 780 } 781 PetscFunctionReturn(PETSC_SUCCESS); 782 } 783 784 /* 785 Creates the hash table, and sets the table 786 This table is created only once. 787 If new entried need to be added to the matrix 788 then the hash table has to be destroyed and 789 recreated. 790 */ 791 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 792 { 793 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 794 Mat A = baij->A, B = baij->B; 795 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 796 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 797 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 798 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 799 PetscInt *HT, key; 800 MatScalar **HD; 801 PetscReal tmp; 802 #if defined(PETSC_USE_INFO) 803 PetscInt ct = 0, max = 0; 804 #endif 805 806 PetscFunctionBegin; 807 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 808 809 baij->ht_size = (PetscInt)(factor * nz); 810 ht_size = baij->ht_size; 811 812 /* Allocate Memory for Hash Table */ 813 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 814 HD = baij->hd; 815 HT = baij->ht; 816 817 /* Loop Over A */ 818 for (i = 0; i < a->mbs; i++) { 819 for (j = ai[i]; j < ai[i + 1]; j++) { 820 row = i + rstart; 821 col = aj[j] + cstart; 822 823 key = row * Nbs + col + 1; 824 h1 = HASH(ht_size, key, tmp); 825 for (k = 0; k < ht_size; k++) { 826 if (!HT[(h1 + k) % ht_size]) { 827 HT[(h1 + k) % ht_size] = key; 828 HD[(h1 + k) % ht_size] = a->a + j * bs2; 829 break; 830 #if defined(PETSC_USE_INFO) 831 } else { 832 ct++; 833 #endif 834 } 835 } 836 #if defined(PETSC_USE_INFO) 837 if (k > max) max = k; 838 #endif 839 } 840 } 841 /* Loop Over B */ 842 for (i = 0; i < b->mbs; i++) { 843 for (j = bi[i]; j < bi[i + 1]; j++) { 844 row = i + rstart; 845 col = garray[bj[j]]; 846 key = row * Nbs + col + 1; 847 h1 = HASH(ht_size, key, tmp); 848 for (k = 0; k < ht_size; k++) { 849 if (!HT[(h1 + k) % ht_size]) { 850 HT[(h1 + k) % ht_size] = key; 851 HD[(h1 + k) % ht_size] = b->a + j * bs2; 852 break; 853 #if defined(PETSC_USE_INFO) 854 } else { 855 ct++; 856 #endif 857 } 858 } 859 #if defined(PETSC_USE_INFO) 860 if (k > max) max = k; 861 #endif 862 } 863 } 864 865 /* Print Summary */ 866 #if defined(PETSC_USE_INFO) 867 for (i = 0, j = 0; i < ht_size; i++) { 868 if (HT[i]) j++; 869 } 870 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 871 #endif 872 PetscFunctionReturn(PETSC_SUCCESS); 873 } 874 875 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 876 { 877 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 878 PetscInt nstash, reallocs; 879 880 PetscFunctionBegin; 881 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 882 883 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 884 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 885 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 886 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 887 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 888 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 889 PetscFunctionReturn(PETSC_SUCCESS); 890 } 891 892 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 893 { 894 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 895 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 896 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 897 PetscInt *row, *col; 898 PetscBool r1, r2, r3, other_disassembled; 899 MatScalar *val; 900 PetscMPIInt n; 901 902 PetscFunctionBegin; 903 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 904 if (!baij->donotstash && !mat->nooffprocentries) { 905 while (1) { 906 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 907 if (!flg) break; 908 909 for (i = 0; i < n;) { 910 /* Now identify the consecutive vals belonging to the same row */ 911 for (j = i, rstart = row[j]; j < n; j++) { 912 if (row[j] != rstart) break; 913 } 914 if (j < n) ncols = j - i; 915 else ncols = n - i; 916 /* Now assemble all these values with a single function call */ 917 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 918 i = j; 919 } 920 } 921 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 922 /* Now process the block-stash. Since the values are stashed column-oriented, 923 set the row-oriented flag to column-oriented, and after MatSetValues() 924 restore the original flags */ 925 r1 = baij->roworiented; 926 r2 = a->roworiented; 927 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 928 929 baij->roworiented = PETSC_FALSE; 930 a->roworiented = PETSC_FALSE; 931 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 932 while (1) { 933 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 934 if (!flg) break; 935 936 for (i = 0; i < n;) { 937 /* Now identify the consecutive vals belonging to the same row */ 938 for (j = i, rstart = row[j]; j < n; j++) { 939 if (row[j] != rstart) break; 940 } 941 if (j < n) ncols = j - i; 942 else ncols = n - i; 943 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 944 i = j; 945 } 946 } 947 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 948 949 baij->roworiented = r1; 950 a->roworiented = r2; 951 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 952 } 953 954 PetscCall(MatAssemblyBegin(baij->A, mode)); 955 PetscCall(MatAssemblyEnd(baij->A, mode)); 956 957 /* determine if any processor has disassembled, if so we must 958 also disassemble ourselves, in order that we may reassemble. */ 959 /* 960 if nonzero structure of submatrix B cannot change then we know that 961 no processor disassembled thus we can skip this stuff 962 */ 963 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 964 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 965 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 966 } 967 968 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 969 PetscCall(MatAssemblyBegin(baij->B, mode)); 970 PetscCall(MatAssemblyEnd(baij->B, mode)); 971 972 #if defined(PETSC_USE_INFO) 973 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 974 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 975 976 baij->ht_total_ct = 0; 977 baij->ht_insert_ct = 0; 978 } 979 #endif 980 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 981 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 982 983 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 984 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 985 } 986 987 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 988 989 baij->rowvalues = NULL; 990 991 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 992 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 993 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 994 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 995 } 996 PetscFunctionReturn(PETSC_SUCCESS); 997 } 998 999 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1000 #include <petscdraw.h> 1001 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1002 { 1003 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1004 PetscMPIInt rank = baij->rank; 1005 PetscInt bs = mat->rmap->bs; 1006 PetscBool iascii, isdraw; 1007 PetscViewer sviewer; 1008 PetscViewerFormat format; 1009 1010 PetscFunctionBegin; 1011 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1012 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1013 if (iascii) { 1014 PetscCall(PetscViewerGetFormat(viewer, &format)); 1015 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1016 MatInfo info; 1017 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1018 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1019 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1020 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1021 mat->rmap->bs, (double)info.memory)); 1022 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1023 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1024 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1025 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1026 PetscCall(PetscViewerFlush(viewer)); 1027 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1028 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1029 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1030 PetscFunctionReturn(PETSC_SUCCESS); 1031 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1032 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1033 PetscFunctionReturn(PETSC_SUCCESS); 1034 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1035 PetscFunctionReturn(PETSC_SUCCESS); 1036 } 1037 } 1038 1039 if (isdraw) { 1040 PetscDraw draw; 1041 PetscBool isnull; 1042 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1043 PetscCall(PetscDrawIsNull(draw, &isnull)); 1044 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 1047 { 1048 /* assemble the entire matrix onto first processor. */ 1049 Mat A; 1050 Mat_SeqBAIJ *Aloc; 1051 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1052 MatScalar *a; 1053 const char *matname; 1054 1055 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1056 /* Perhaps this should be the type of mat? */ 1057 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1058 if (rank == 0) { 1059 PetscCall(MatSetSizes(A, M, N, M, N)); 1060 } else { 1061 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1062 } 1063 PetscCall(MatSetType(A, MATMPIBAIJ)); 1064 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1065 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1066 1067 /* copy over the A part */ 1068 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1069 ai = Aloc->i; 1070 aj = Aloc->j; 1071 a = Aloc->a; 1072 PetscCall(PetscMalloc1(bs, &rvals)); 1073 1074 for (i = 0; i < mbs; i++) { 1075 rvals[0] = bs * (baij->rstartbs + i); 1076 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1077 for (j = ai[i]; j < ai[i + 1]; j++) { 1078 col = (baij->cstartbs + aj[j]) * bs; 1079 for (k = 0; k < bs; k++) { 1080 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1081 col++; 1082 a += bs; 1083 } 1084 } 1085 } 1086 /* copy over the B part */ 1087 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1088 ai = Aloc->i; 1089 aj = Aloc->j; 1090 a = Aloc->a; 1091 for (i = 0; i < mbs; i++) { 1092 rvals[0] = bs * (baij->rstartbs + i); 1093 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1094 for (j = ai[i]; j < ai[i + 1]; j++) { 1095 col = baij->garray[aj[j]] * bs; 1096 for (k = 0; k < bs; k++) { 1097 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1098 col++; 1099 a += bs; 1100 } 1101 } 1102 } 1103 PetscCall(PetscFree(rvals)); 1104 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1105 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1106 /* 1107 Everyone has to call to draw the matrix since the graphics waits are 1108 synchronized across all processors that share the PetscDraw object 1109 */ 1110 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1111 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1112 if (rank == 0) { 1113 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1114 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1115 } 1116 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1117 PetscCall(MatDestroy(&A)); 1118 } 1119 PetscFunctionReturn(PETSC_SUCCESS); 1120 } 1121 1122 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1123 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1124 { 1125 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1126 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1127 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1128 const PetscInt *garray = aij->garray; 1129 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1130 PetscInt64 nz, hnz; 1131 PetscInt *rowlens, *colidxs; 1132 PetscScalar *matvals; 1133 PetscMPIInt rank; 1134 1135 PetscFunctionBegin; 1136 PetscCall(PetscViewerSetUp(viewer)); 1137 1138 M = mat->rmap->N; 1139 N = mat->cmap->N; 1140 m = mat->rmap->n; 1141 rs = mat->rmap->rstart; 1142 cs = mat->cmap->rstart; 1143 bs = mat->rmap->bs; 1144 nz = bs * bs * (A->nz + B->nz); 1145 1146 /* write matrix header */ 1147 header[0] = MAT_FILE_CLASSID; 1148 header[1] = M; 1149 header[2] = N; 1150 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1151 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1152 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1153 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1154 1155 /* fill in and store row lengths */ 1156 PetscCall(PetscMalloc1(m, &rowlens)); 1157 for (cnt = 0, i = 0; i < A->mbs; i++) 1158 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1159 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1160 PetscCall(PetscFree(rowlens)); 1161 1162 /* fill in and store column indices */ 1163 PetscCall(PetscMalloc1(nz, &colidxs)); 1164 for (cnt = 0, i = 0; i < A->mbs; i++) { 1165 for (k = 0; k < bs; k++) { 1166 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1167 if (garray[B->j[jb]] > cs / bs) break; 1168 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1169 } 1170 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1171 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1172 for (; jb < B->i[i + 1]; jb++) 1173 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1174 } 1175 } 1176 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1177 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1178 PetscCall(PetscFree(colidxs)); 1179 1180 /* fill in and store nonzero values */ 1181 PetscCall(PetscMalloc1(nz, &matvals)); 1182 for (cnt = 0, i = 0; i < A->mbs; i++) { 1183 for (k = 0; k < bs; k++) { 1184 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1185 if (garray[B->j[jb]] > cs / bs) break; 1186 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1187 } 1188 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1189 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1190 for (; jb < B->i[i + 1]; jb++) 1191 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1192 } 1193 } 1194 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1195 PetscCall(PetscFree(matvals)); 1196 1197 /* write block size option to the viewer's .info file */ 1198 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1199 PetscFunctionReturn(PETSC_SUCCESS); 1200 } 1201 1202 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1203 { 1204 PetscBool iascii, isdraw, issocket, isbinary; 1205 1206 PetscFunctionBegin; 1207 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1208 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1209 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1210 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1211 if (iascii || isdraw || issocket) { 1212 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1213 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1214 PetscFunctionReturn(PETSC_SUCCESS); 1215 } 1216 1217 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1218 { 1219 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1220 PetscInt nt; 1221 1222 PetscFunctionBegin; 1223 PetscCall(VecGetLocalSize(xx, &nt)); 1224 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1225 PetscCall(VecGetLocalSize(yy, &nt)); 1226 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1227 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1228 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1229 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1230 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1231 PetscFunctionReturn(PETSC_SUCCESS); 1232 } 1233 1234 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1235 { 1236 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1237 1238 PetscFunctionBegin; 1239 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1240 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1241 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1242 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1243 PetscFunctionReturn(PETSC_SUCCESS); 1244 } 1245 1246 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1247 { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1249 1250 PetscFunctionBegin; 1251 /* do nondiagonal part */ 1252 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1253 /* do local part */ 1254 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1255 /* add partial results together */ 1256 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1257 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1258 PetscFunctionReturn(PETSC_SUCCESS); 1259 } 1260 1261 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1262 { 1263 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1264 1265 PetscFunctionBegin; 1266 /* do nondiagonal part */ 1267 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1268 /* do local part */ 1269 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1270 /* add partial results together */ 1271 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1272 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1273 PetscFunctionReturn(PETSC_SUCCESS); 1274 } 1275 1276 /* 1277 This only works correctly for square matrices where the subblock A->A is the 1278 diagonal block 1279 */ 1280 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1281 { 1282 PetscFunctionBegin; 1283 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1284 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1285 PetscFunctionReturn(PETSC_SUCCESS); 1286 } 1287 1288 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1289 { 1290 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1291 1292 PetscFunctionBegin; 1293 PetscCall(MatScale(a->A, aa)); 1294 PetscCall(MatScale(a->B, aa)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } 1297 1298 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1299 { 1300 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1301 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1302 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1303 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1304 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1305 1306 PetscFunctionBegin; 1307 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1308 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1309 mat->getrowactive = PETSC_TRUE; 1310 1311 if (!mat->rowvalues && (idx || v)) { 1312 /* 1313 allocate enough space to hold information from the longest row. 1314 */ 1315 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1316 PetscInt max = 1, mbs = mat->mbs, tmp; 1317 for (i = 0; i < mbs; i++) { 1318 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1319 if (max < tmp) max = tmp; 1320 } 1321 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1322 } 1323 lrow = row - brstart; 1324 1325 pvA = &vworkA; 1326 pcA = &cworkA; 1327 pvB = &vworkB; 1328 pcB = &cworkB; 1329 if (!v) { 1330 pvA = NULL; 1331 pvB = NULL; 1332 } 1333 if (!idx) { 1334 pcA = NULL; 1335 if (!v) pcB = NULL; 1336 } 1337 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1338 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1339 nztot = nzA + nzB; 1340 1341 cmap = mat->garray; 1342 if (v || idx) { 1343 if (nztot) { 1344 /* Sort by increasing column numbers, assuming A and B already sorted */ 1345 PetscInt imark = -1; 1346 if (v) { 1347 *v = v_p = mat->rowvalues; 1348 for (i = 0; i < nzB; i++) { 1349 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1350 else break; 1351 } 1352 imark = i; 1353 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1354 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1355 } 1356 if (idx) { 1357 *idx = idx_p = mat->rowindices; 1358 if (imark > -1) { 1359 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1360 } else { 1361 for (i = 0; i < nzB; i++) { 1362 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1363 else break; 1364 } 1365 imark = i; 1366 } 1367 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1368 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 } 1370 } else { 1371 if (idx) *idx = NULL; 1372 if (v) *v = NULL; 1373 } 1374 } 1375 *nz = nztot; 1376 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1377 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1378 PetscFunctionReturn(PETSC_SUCCESS); 1379 } 1380 1381 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1382 { 1383 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1384 1385 PetscFunctionBegin; 1386 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1387 baij->getrowactive = PETSC_FALSE; 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1392 { 1393 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1394 1395 PetscFunctionBegin; 1396 PetscCall(MatZeroEntries(l->A)); 1397 PetscCall(MatZeroEntries(l->B)); 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1402 { 1403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1404 Mat A = a->A, B = a->B; 1405 PetscLogDouble isend[5], irecv[5]; 1406 1407 PetscFunctionBegin; 1408 info->block_size = (PetscReal)matin->rmap->bs; 1409 1410 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1411 1412 isend[0] = info->nz_used; 1413 isend[1] = info->nz_allocated; 1414 isend[2] = info->nz_unneeded; 1415 isend[3] = info->memory; 1416 isend[4] = info->mallocs; 1417 1418 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1419 1420 isend[0] += info->nz_used; 1421 isend[1] += info->nz_allocated; 1422 isend[2] += info->nz_unneeded; 1423 isend[3] += info->memory; 1424 isend[4] += info->mallocs; 1425 1426 if (flag == MAT_LOCAL) { 1427 info->nz_used = isend[0]; 1428 info->nz_allocated = isend[1]; 1429 info->nz_unneeded = isend[2]; 1430 info->memory = isend[3]; 1431 info->mallocs = isend[4]; 1432 } else if (flag == MAT_GLOBAL_MAX) { 1433 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1434 1435 info->nz_used = irecv[0]; 1436 info->nz_allocated = irecv[1]; 1437 info->nz_unneeded = irecv[2]; 1438 info->memory = irecv[3]; 1439 info->mallocs = irecv[4]; 1440 } else if (flag == MAT_GLOBAL_SUM) { 1441 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1449 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1450 info->fill_ratio_needed = 0; 1451 info->factor_mallocs = 0; 1452 PetscFunctionReturn(PETSC_SUCCESS); 1453 } 1454 1455 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1456 { 1457 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1458 1459 PetscFunctionBegin; 1460 switch (op) { 1461 case MAT_NEW_NONZERO_LOCATIONS: 1462 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1463 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1464 case MAT_KEEP_NONZERO_PATTERN: 1465 case MAT_NEW_NONZERO_LOCATION_ERR: 1466 MatCheckPreallocated(A, 1); 1467 PetscCall(MatSetOption(a->A, op, flg)); 1468 PetscCall(MatSetOption(a->B, op, flg)); 1469 break; 1470 case MAT_ROW_ORIENTED: 1471 MatCheckPreallocated(A, 1); 1472 a->roworiented = flg; 1473 1474 PetscCall(MatSetOption(a->A, op, flg)); 1475 PetscCall(MatSetOption(a->B, op, flg)); 1476 break; 1477 case MAT_FORCE_DIAGONAL_ENTRIES: 1478 case MAT_SORTED_FULL: 1479 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1480 break; 1481 case MAT_IGNORE_OFF_PROC_ENTRIES: 1482 a->donotstash = flg; 1483 break; 1484 case MAT_USE_HASH_TABLE: 1485 a->ht_flag = flg; 1486 a->ht_fact = 1.39; 1487 break; 1488 case MAT_SYMMETRIC: 1489 case MAT_STRUCTURALLY_SYMMETRIC: 1490 case MAT_HERMITIAN: 1491 case MAT_SUBMAT_SINGLEIS: 1492 case MAT_SYMMETRY_ETERNAL: 1493 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1494 case MAT_SPD_ETERNAL: 1495 /* if the diagonal matrix is square it inherits some of the properties above */ 1496 break; 1497 default: 1498 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1499 } 1500 PetscFunctionReturn(PETSC_SUCCESS); 1501 } 1502 1503 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1504 { 1505 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1506 Mat_SeqBAIJ *Aloc; 1507 Mat B; 1508 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1509 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1510 MatScalar *a; 1511 1512 PetscFunctionBegin; 1513 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1514 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1515 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1516 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1517 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1518 /* Do not know preallocation information, but must set block size */ 1519 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1520 } else { 1521 B = *matout; 1522 } 1523 1524 /* copy over the A part */ 1525 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1526 ai = Aloc->i; 1527 aj = Aloc->j; 1528 a = Aloc->a; 1529 PetscCall(PetscMalloc1(bs, &rvals)); 1530 1531 for (i = 0; i < mbs; i++) { 1532 rvals[0] = bs * (baij->rstartbs + i); 1533 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1534 for (j = ai[i]; j < ai[i + 1]; j++) { 1535 col = (baij->cstartbs + aj[j]) * bs; 1536 for (k = 0; k < bs; k++) { 1537 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1538 1539 col++; 1540 a += bs; 1541 } 1542 } 1543 } 1544 /* copy over the B part */ 1545 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1546 ai = Aloc->i; 1547 aj = Aloc->j; 1548 a = Aloc->a; 1549 for (i = 0; i < mbs; i++) { 1550 rvals[0] = bs * (baij->rstartbs + i); 1551 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1552 for (j = ai[i]; j < ai[i + 1]; j++) { 1553 col = baij->garray[aj[j]] * bs; 1554 for (k = 0; k < bs; k++) { 1555 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1556 col++; 1557 a += bs; 1558 } 1559 } 1560 } 1561 PetscCall(PetscFree(rvals)); 1562 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1563 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1564 1565 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1566 else PetscCall(MatHeaderMerge(A, &B)); 1567 PetscFunctionReturn(PETSC_SUCCESS); 1568 } 1569 1570 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1571 { 1572 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1573 Mat a = baij->A, b = baij->B; 1574 PetscInt s1, s2, s3; 1575 1576 PetscFunctionBegin; 1577 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1578 if (rr) { 1579 PetscCall(VecGetLocalSize(rr, &s1)); 1580 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1581 /* Overlap communication with computation. */ 1582 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1583 } 1584 if (ll) { 1585 PetscCall(VecGetLocalSize(ll, &s1)); 1586 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1587 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1588 } 1589 /* scale the diagonal block */ 1590 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1591 1592 if (rr) { 1593 /* Do a scatter end and then right scale the off-diagonal block */ 1594 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1595 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1596 } 1597 PetscFunctionReturn(PETSC_SUCCESS); 1598 } 1599 1600 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1601 { 1602 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1603 PetscInt *lrows; 1604 PetscInt r, len; 1605 PetscBool cong; 1606 1607 PetscFunctionBegin; 1608 /* get locally owned rows */ 1609 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1610 /* fix right-hand side if needed */ 1611 if (x && b) { 1612 const PetscScalar *xx; 1613 PetscScalar *bb; 1614 1615 PetscCall(VecGetArrayRead(x, &xx)); 1616 PetscCall(VecGetArray(b, &bb)); 1617 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1618 PetscCall(VecRestoreArrayRead(x, &xx)); 1619 PetscCall(VecRestoreArray(b, &bb)); 1620 } 1621 1622 /* actually zap the local rows */ 1623 /* 1624 Zero the required rows. If the "diagonal block" of the matrix 1625 is square and the user wishes to set the diagonal we use separate 1626 code so that MatSetValues() is not called for each diagonal allocating 1627 new memory, thus calling lots of mallocs and slowing things down. 1628 1629 */ 1630 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1631 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1632 PetscCall(MatHasCongruentLayouts(A, &cong)); 1633 if ((diag != 0.0) && cong) { 1634 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1635 } else if (diag != 0.0) { 1636 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1637 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1638 for (r = 0; r < len; ++r) { 1639 const PetscInt row = lrows[r] + A->rmap->rstart; 1640 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1641 } 1642 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1643 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1644 } else { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1646 } 1647 PetscCall(PetscFree(lrows)); 1648 1649 /* only change matrix nonzero state if pattern was allowed to be changed */ 1650 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1651 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1652 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1653 } 1654 PetscFunctionReturn(PETSC_SUCCESS); 1655 } 1656 1657 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1658 { 1659 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1660 PetscMPIInt n = A->rmap->n, p = 0; 1661 PetscInt i, j, k, r, len = 0, row, col, count; 1662 PetscInt *lrows, *owners = A->rmap->range; 1663 PetscSFNode *rrows; 1664 PetscSF sf; 1665 const PetscScalar *xx; 1666 PetscScalar *bb, *mask; 1667 Vec xmask, lmask; 1668 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1669 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1670 PetscScalar *aa; 1671 1672 PetscFunctionBegin; 1673 /* Create SF where leaves are input rows and roots are owned rows */ 1674 PetscCall(PetscMalloc1(n, &lrows)); 1675 for (r = 0; r < n; ++r) lrows[r] = -1; 1676 PetscCall(PetscMalloc1(N, &rrows)); 1677 for (r = 0; r < N; ++r) { 1678 const PetscInt idx = rows[r]; 1679 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1680 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1681 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1682 } 1683 rrows[r].rank = p; 1684 rrows[r].index = rows[r] - owners[p]; 1685 } 1686 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1687 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1688 /* Collect flags for rows to be zeroed */ 1689 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1690 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1691 PetscCall(PetscSFDestroy(&sf)); 1692 /* Compress and put in row numbers */ 1693 for (r = 0; r < n; ++r) 1694 if (lrows[r] >= 0) lrows[len++] = r; 1695 /* zero diagonal part of matrix */ 1696 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1697 /* handle off-diagonal part of matrix */ 1698 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1699 PetscCall(VecDuplicate(l->lvec, &lmask)); 1700 PetscCall(VecGetArray(xmask, &bb)); 1701 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1702 PetscCall(VecRestoreArray(xmask, &bb)); 1703 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1704 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1705 PetscCall(VecDestroy(&xmask)); 1706 if (x) { 1707 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1708 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1709 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1710 PetscCall(VecGetArray(b, &bb)); 1711 } 1712 PetscCall(VecGetArray(lmask, &mask)); 1713 /* remove zeroed rows of off-diagonal matrix */ 1714 for (i = 0; i < len; ++i) { 1715 row = lrows[i]; 1716 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1717 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1718 for (k = 0; k < count; ++k) { 1719 aa[0] = 0.0; 1720 aa += bs; 1721 } 1722 } 1723 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1724 for (i = 0; i < l->B->rmap->N; ++i) { 1725 row = i / bs; 1726 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1727 for (k = 0; k < bs; ++k) { 1728 col = bs * baij->j[j] + k; 1729 if (PetscAbsScalar(mask[col])) { 1730 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1731 if (x) bb[i] -= aa[0] * xx[col]; 1732 aa[0] = 0.0; 1733 } 1734 } 1735 } 1736 } 1737 if (x) { 1738 PetscCall(VecRestoreArray(b, &bb)); 1739 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1740 } 1741 PetscCall(VecRestoreArray(lmask, &mask)); 1742 PetscCall(VecDestroy(&lmask)); 1743 PetscCall(PetscFree(lrows)); 1744 1745 /* only change matrix nonzero state if pattern was allowed to be changed */ 1746 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1747 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1748 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1749 } 1750 PetscFunctionReturn(PETSC_SUCCESS); 1751 } 1752 1753 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1754 { 1755 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1756 1757 PetscFunctionBegin; 1758 PetscCall(MatSetUnfactored(a->A)); 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1763 1764 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1765 { 1766 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1767 Mat a, b, c, d; 1768 PetscBool flg; 1769 1770 PetscFunctionBegin; 1771 a = matA->A; 1772 b = matA->B; 1773 c = matB->A; 1774 d = matB->B; 1775 1776 PetscCall(MatEqual(a, c, &flg)); 1777 if (flg) PetscCall(MatEqual(b, d, &flg)); 1778 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1779 PetscFunctionReturn(PETSC_SUCCESS); 1780 } 1781 1782 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1783 { 1784 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1785 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1786 1787 PetscFunctionBegin; 1788 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1789 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1790 PetscCall(MatCopy_Basic(A, B, str)); 1791 } else { 1792 PetscCall(MatCopy(a->A, b->A, str)); 1793 PetscCall(MatCopy(a->B, b->B, str)); 1794 } 1795 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1800 { 1801 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1802 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1803 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1804 1805 PetscFunctionBegin; 1806 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1811 { 1812 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1813 PetscBLASInt bnz, one = 1; 1814 Mat_SeqBAIJ *x, *y; 1815 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1816 1817 PetscFunctionBegin; 1818 if (str == SAME_NONZERO_PATTERN) { 1819 PetscScalar alpha = a; 1820 x = (Mat_SeqBAIJ *)xx->A->data; 1821 y = (Mat_SeqBAIJ *)yy->A->data; 1822 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1823 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1824 x = (Mat_SeqBAIJ *)xx->B->data; 1825 y = (Mat_SeqBAIJ *)yy->B->data; 1826 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1827 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1828 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1829 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1830 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1831 } else { 1832 Mat B; 1833 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1834 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1835 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1836 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1837 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1838 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1839 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1840 PetscCall(MatSetType(B, MATMPIBAIJ)); 1841 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1842 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1843 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1844 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1845 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1846 PetscCall(MatHeaderMerge(Y, &B)); 1847 PetscCall(PetscFree(nnz_d)); 1848 PetscCall(PetscFree(nnz_o)); 1849 } 1850 PetscFunctionReturn(PETSC_SUCCESS); 1851 } 1852 1853 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1854 { 1855 PetscFunctionBegin; 1856 if (PetscDefined(USE_COMPLEX)) { 1857 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1858 1859 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1860 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1866 { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1868 1869 PetscFunctionBegin; 1870 PetscCall(MatRealPart(a->A)); 1871 PetscCall(MatRealPart(a->B)); 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatImaginaryPart(a->A)); 1881 PetscCall(MatImaginaryPart(a->B)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1886 { 1887 IS iscol_local; 1888 PetscInt csize; 1889 1890 PetscFunctionBegin; 1891 PetscCall(ISGetLocalSize(iscol, &csize)); 1892 if (call == MAT_REUSE_MATRIX) { 1893 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1894 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1895 } else { 1896 PetscCall(ISAllGather(iscol, &iscol_local)); 1897 } 1898 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1899 if (call == MAT_INITIAL_MATRIX) { 1900 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1901 PetscCall(ISDestroy(&iscol_local)); 1902 } 1903 PetscFunctionReturn(PETSC_SUCCESS); 1904 } 1905 1906 /* 1907 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1908 in local and then by concatenating the local matrices the end result. 1909 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1910 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1911 */ 1912 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1913 { 1914 PetscMPIInt rank, size; 1915 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1916 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1917 Mat M, Mreuse; 1918 MatScalar *vwork, *aa; 1919 MPI_Comm comm; 1920 IS isrow_new, iscol_new; 1921 Mat_SeqBAIJ *aij; 1922 1923 PetscFunctionBegin; 1924 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1925 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1926 PetscCallMPI(MPI_Comm_size(comm, &size)); 1927 /* The compression and expansion should be avoided. Doesn't point 1928 out errors, might change the indices, hence buggey */ 1929 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1930 if (isrow == iscol) { 1931 iscol_new = isrow_new; 1932 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1933 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1934 1935 if (call == MAT_REUSE_MATRIX) { 1936 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1937 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1938 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1939 } else { 1940 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1941 } 1942 PetscCall(ISDestroy(&isrow_new)); 1943 PetscCall(ISDestroy(&iscol_new)); 1944 /* 1945 m - number of local rows 1946 n - number of columns (same on all processors) 1947 rstart - first row in new global matrix generated 1948 */ 1949 PetscCall(MatGetBlockSize(mat, &bs)); 1950 PetscCall(MatGetSize(Mreuse, &m, &n)); 1951 m = m / bs; 1952 n = n / bs; 1953 1954 if (call == MAT_INITIAL_MATRIX) { 1955 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1956 ii = aij->i; 1957 jj = aij->j; 1958 1959 /* 1960 Determine the number of non-zeros in the diagonal and off-diagonal 1961 portions of the matrix in order to do correct preallocation 1962 */ 1963 1964 /* first get start and end of "diagonal" columns */ 1965 if (csize == PETSC_DECIDE) { 1966 PetscCall(ISGetSize(isrow, &mglobal)); 1967 if (mglobal == n * bs) { /* square matrix */ 1968 nlocal = m; 1969 } else { 1970 nlocal = n / size + ((n % size) > rank); 1971 } 1972 } else { 1973 nlocal = csize / bs; 1974 } 1975 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1976 rstart = rend - nlocal; 1977 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1978 1979 /* next, compute all the lengths */ 1980 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1981 for (i = 0; i < m; i++) { 1982 jend = ii[i + 1] - ii[i]; 1983 olen = 0; 1984 dlen = 0; 1985 for (j = 0; j < jend; j++) { 1986 if (*jj < rstart || *jj >= rend) olen++; 1987 else dlen++; 1988 jj++; 1989 } 1990 olens[i] = olen; 1991 dlens[i] = dlen; 1992 } 1993 PetscCall(MatCreate(comm, &M)); 1994 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1995 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1996 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1997 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1998 PetscCall(PetscFree2(dlens, olens)); 1999 } else { 2000 PetscInt ml, nl; 2001 2002 M = *newmat; 2003 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2004 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2005 PetscCall(MatZeroEntries(M)); 2006 /* 2007 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2008 rather than the slower MatSetValues(). 2009 */ 2010 M->was_assembled = PETSC_TRUE; 2011 M->assembled = PETSC_FALSE; 2012 } 2013 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2014 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2015 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2016 ii = aij->i; 2017 jj = aij->j; 2018 aa = aij->a; 2019 for (i = 0; i < m; i++) { 2020 row = rstart / bs + i; 2021 nz = ii[i + 1] - ii[i]; 2022 cwork = jj; 2023 jj = PetscSafePointerPlusOffset(jj, nz); 2024 vwork = aa; 2025 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2026 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2027 } 2028 2029 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2030 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2031 *newmat = M; 2032 2033 /* save submatrix used in processor for next request */ 2034 if (call == MAT_INITIAL_MATRIX) { 2035 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2036 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2037 } 2038 PetscFunctionReturn(PETSC_SUCCESS); 2039 } 2040 2041 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2042 { 2043 MPI_Comm comm, pcomm; 2044 PetscInt clocal_size, nrows; 2045 const PetscInt *rows; 2046 PetscMPIInt size; 2047 IS crowp, lcolp; 2048 2049 PetscFunctionBegin; 2050 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2051 /* make a collective version of 'rowp' */ 2052 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2053 if (pcomm == comm) { 2054 crowp = rowp; 2055 } else { 2056 PetscCall(ISGetSize(rowp, &nrows)); 2057 PetscCall(ISGetIndices(rowp, &rows)); 2058 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2059 PetscCall(ISRestoreIndices(rowp, &rows)); 2060 } 2061 PetscCall(ISSetPermutation(crowp)); 2062 /* make a local version of 'colp' */ 2063 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2064 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2065 if (size == 1) { 2066 lcolp = colp; 2067 } else { 2068 PetscCall(ISAllGather(colp, &lcolp)); 2069 } 2070 PetscCall(ISSetPermutation(lcolp)); 2071 /* now we just get the submatrix */ 2072 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2073 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2074 /* clean up */ 2075 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2076 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2081 { 2082 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2083 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2084 2085 PetscFunctionBegin; 2086 if (nghosts) *nghosts = B->nbs; 2087 if (ghosts) *ghosts = baij->garray; 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2092 { 2093 Mat B; 2094 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2095 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2096 Mat_SeqAIJ *b; 2097 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2098 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2099 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2100 2101 PetscFunctionBegin; 2102 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2103 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2104 2105 /* Tell every processor the number of nonzeros per row */ 2106 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2107 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2108 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2109 displs = recvcounts + size; 2110 for (i = 0; i < size; i++) { 2111 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2112 displs[i] = A->rmap->range[i] / bs; 2113 } 2114 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2115 /* Create the sequential matrix of the same type as the local block diagonal */ 2116 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2117 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2118 PetscCall(MatSetType(B, MATSEQAIJ)); 2119 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2120 b = (Mat_SeqAIJ *)B->data; 2121 2122 /* Copy my part of matrix column indices over */ 2123 sendcount = ad->nz + bd->nz; 2124 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2125 a_jsendbuf = ad->j; 2126 b_jsendbuf = bd->j; 2127 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2128 cnt = 0; 2129 for (i = 0; i < n; i++) { 2130 /* put in lower diagonal portion */ 2131 m = bd->i[i + 1] - bd->i[i]; 2132 while (m > 0) { 2133 /* is it above diagonal (in bd (compressed) numbering) */ 2134 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2135 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2136 m--; 2137 } 2138 2139 /* put in diagonal portion */ 2140 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2141 2142 /* put in upper diagonal portion */ 2143 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2144 } 2145 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2146 2147 /* Gather all column indices to all processors */ 2148 for (i = 0; i < size; i++) { 2149 recvcounts[i] = 0; 2150 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2151 } 2152 displs[0] = 0; 2153 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2154 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2155 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2156 /* set the b->ilen (length of each row) values */ 2157 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2158 /* set the b->i indices */ 2159 b->i[0] = 0; 2160 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2161 PetscCall(PetscFree(lens)); 2162 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2163 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2164 PetscCall(PetscFree(recvcounts)); 2165 2166 PetscCall(MatPropagateSymmetryOptions(A, B)); 2167 *newmat = B; 2168 PetscFunctionReturn(PETSC_SUCCESS); 2169 } 2170 2171 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2172 { 2173 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2174 Vec bb1 = NULL; 2175 2176 PetscFunctionBegin; 2177 if (flag == SOR_APPLY_UPPER) { 2178 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2183 2184 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2185 if (flag & SOR_ZERO_INITIAL_GUESS) { 2186 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2187 its--; 2188 } 2189 2190 while (its--) { 2191 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2192 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2193 2194 /* update rhs: bb1 = bb - B*x */ 2195 PetscCall(VecScale(mat->lvec, -1.0)); 2196 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2197 2198 /* local sweep */ 2199 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2200 } 2201 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2202 if (flag & SOR_ZERO_INITIAL_GUESS) { 2203 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2204 its--; 2205 } 2206 while (its--) { 2207 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2208 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2209 2210 /* update rhs: bb1 = bb - B*x */ 2211 PetscCall(VecScale(mat->lvec, -1.0)); 2212 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2213 2214 /* local sweep */ 2215 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2216 } 2217 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2218 if (flag & SOR_ZERO_INITIAL_GUESS) { 2219 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2220 its--; 2221 } 2222 while (its--) { 2223 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2224 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2225 2226 /* update rhs: bb1 = bb - B*x */ 2227 PetscCall(VecScale(mat->lvec, -1.0)); 2228 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2229 2230 /* local sweep */ 2231 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2232 } 2233 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2234 2235 PetscCall(VecDestroy(&bb1)); 2236 PetscFunctionReturn(PETSC_SUCCESS); 2237 } 2238 2239 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2240 { 2241 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2242 PetscInt m, N, i, *garray = aij->garray; 2243 PetscInt ib, jb, bs = A->rmap->bs; 2244 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2245 MatScalar *a_val = a_aij->a; 2246 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2247 MatScalar *b_val = b_aij->a; 2248 PetscReal *work; 2249 2250 PetscFunctionBegin; 2251 PetscCall(MatGetSize(A, &m, &N)); 2252 PetscCall(PetscCalloc1(N, &work)); 2253 if (type == NORM_2) { 2254 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2255 for (jb = 0; jb < bs; jb++) { 2256 for (ib = 0; ib < bs; ib++) { 2257 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2258 a_val++; 2259 } 2260 } 2261 } 2262 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2263 for (jb = 0; jb < bs; jb++) { 2264 for (ib = 0; ib < bs; ib++) { 2265 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2266 b_val++; 2267 } 2268 } 2269 } 2270 } else if (type == NORM_1) { 2271 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2272 for (jb = 0; jb < bs; jb++) { 2273 for (ib = 0; ib < bs; ib++) { 2274 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2275 a_val++; 2276 } 2277 } 2278 } 2279 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2280 for (jb = 0; jb < bs; jb++) { 2281 for (ib = 0; ib < bs; ib++) { 2282 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2283 b_val++; 2284 } 2285 } 2286 } 2287 } else if (type == NORM_INFINITY) { 2288 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2289 for (jb = 0; jb < bs; jb++) { 2290 for (ib = 0; ib < bs; ib++) { 2291 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2292 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2293 a_val++; 2294 } 2295 } 2296 } 2297 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2298 for (jb = 0; jb < bs; jb++) { 2299 for (ib = 0; ib < bs; ib++) { 2300 int col = garray[b_aij->j[i]] * bs + jb; 2301 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2302 b_val++; 2303 } 2304 } 2305 } 2306 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2307 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2308 for (jb = 0; jb < bs; jb++) { 2309 for (ib = 0; ib < bs; ib++) { 2310 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2311 a_val++; 2312 } 2313 } 2314 } 2315 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2316 for (jb = 0; jb < bs; jb++) { 2317 for (ib = 0; ib < bs; ib++) { 2318 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2319 b_val++; 2320 } 2321 } 2322 } 2323 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2324 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2325 for (jb = 0; jb < bs; jb++) { 2326 for (ib = 0; ib < bs; ib++) { 2327 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2328 a_val++; 2329 } 2330 } 2331 } 2332 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2333 for (jb = 0; jb < bs; jb++) { 2334 for (ib = 0; ib < bs; ib++) { 2335 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2336 b_val++; 2337 } 2338 } 2339 } 2340 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2341 if (type == NORM_INFINITY) { 2342 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2343 } else { 2344 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2345 } 2346 PetscCall(PetscFree(work)); 2347 if (type == NORM_2) { 2348 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2349 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2350 for (i = 0; i < N; i++) reductions[i] /= m; 2351 } 2352 PetscFunctionReturn(PETSC_SUCCESS); 2353 } 2354 2355 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2356 { 2357 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2358 2359 PetscFunctionBegin; 2360 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2361 A->factorerrortype = a->A->factorerrortype; 2362 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2363 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2368 { 2369 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2370 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2371 2372 PetscFunctionBegin; 2373 if (!Y->preallocated) { 2374 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2375 } else if (!aij->nz) { 2376 PetscInt nonew = aij->nonew; 2377 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2378 aij->nonew = nonew; 2379 } 2380 PetscCall(MatShift_Basic(Y, a)); 2381 PetscFunctionReturn(PETSC_SUCCESS); 2382 } 2383 2384 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2385 { 2386 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2387 2388 PetscFunctionBegin; 2389 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2390 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2391 if (d) { 2392 PetscInt rstart; 2393 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2394 *d += rstart / A->rmap->bs; 2395 } 2396 PetscFunctionReturn(PETSC_SUCCESS); 2397 } 2398 2399 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2400 { 2401 PetscFunctionBegin; 2402 *a = ((Mat_MPIBAIJ *)A->data)->A; 2403 PetscFunctionReturn(PETSC_SUCCESS); 2404 } 2405 2406 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2407 { 2408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2409 2410 PetscFunctionBegin; 2411 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2412 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2413 PetscFunctionReturn(PETSC_SUCCESS); 2414 } 2415 2416 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2417 MatGetRow_MPIBAIJ, 2418 MatRestoreRow_MPIBAIJ, 2419 MatMult_MPIBAIJ, 2420 /* 4*/ MatMultAdd_MPIBAIJ, 2421 MatMultTranspose_MPIBAIJ, 2422 MatMultTransposeAdd_MPIBAIJ, 2423 NULL, 2424 NULL, 2425 NULL, 2426 /*10*/ NULL, 2427 NULL, 2428 NULL, 2429 MatSOR_MPIBAIJ, 2430 MatTranspose_MPIBAIJ, 2431 /*15*/ MatGetInfo_MPIBAIJ, 2432 MatEqual_MPIBAIJ, 2433 MatGetDiagonal_MPIBAIJ, 2434 MatDiagonalScale_MPIBAIJ, 2435 MatNorm_MPIBAIJ, 2436 /*20*/ MatAssemblyBegin_MPIBAIJ, 2437 MatAssemblyEnd_MPIBAIJ, 2438 MatSetOption_MPIBAIJ, 2439 MatZeroEntries_MPIBAIJ, 2440 /*24*/ MatZeroRows_MPIBAIJ, 2441 NULL, 2442 NULL, 2443 NULL, 2444 NULL, 2445 /*29*/ MatSetUp_MPI_Hash, 2446 NULL, 2447 NULL, 2448 MatGetDiagonalBlock_MPIBAIJ, 2449 NULL, 2450 /*34*/ MatDuplicate_MPIBAIJ, 2451 NULL, 2452 NULL, 2453 NULL, 2454 NULL, 2455 /*39*/ MatAXPY_MPIBAIJ, 2456 MatCreateSubMatrices_MPIBAIJ, 2457 MatIncreaseOverlap_MPIBAIJ, 2458 MatGetValues_MPIBAIJ, 2459 MatCopy_MPIBAIJ, 2460 /*44*/ NULL, 2461 MatScale_MPIBAIJ, 2462 MatShift_MPIBAIJ, 2463 NULL, 2464 MatZeroRowsColumns_MPIBAIJ, 2465 /*49*/ NULL, 2466 NULL, 2467 NULL, 2468 NULL, 2469 NULL, 2470 /*54*/ MatFDColoringCreate_MPIXAIJ, 2471 NULL, 2472 MatSetUnfactored_MPIBAIJ, 2473 MatPermute_MPIBAIJ, 2474 MatSetValuesBlocked_MPIBAIJ, 2475 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2476 MatDestroy_MPIBAIJ, 2477 MatView_MPIBAIJ, 2478 NULL, 2479 NULL, 2480 /*64*/ NULL, 2481 NULL, 2482 NULL, 2483 NULL, 2484 NULL, 2485 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2486 NULL, 2487 NULL, 2488 NULL, 2489 NULL, 2490 /*74*/ NULL, 2491 MatFDColoringApply_BAIJ, 2492 NULL, 2493 NULL, 2494 NULL, 2495 /*79*/ NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 MatLoad_MPIBAIJ, 2500 /*84*/ NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 /*89*/ NULL, 2506 NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 /*94*/ NULL, 2511 NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 /*99*/ NULL, 2516 NULL, 2517 NULL, 2518 MatConjugate_MPIBAIJ, 2519 NULL, 2520 /*104*/ NULL, 2521 MatRealPart_MPIBAIJ, 2522 MatImaginaryPart_MPIBAIJ, 2523 NULL, 2524 NULL, 2525 /*109*/ NULL, 2526 NULL, 2527 NULL, 2528 NULL, 2529 MatMissingDiagonal_MPIBAIJ, 2530 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2531 NULL, 2532 MatGetGhosts_MPIBAIJ, 2533 NULL, 2534 NULL, 2535 /*119*/ NULL, 2536 NULL, 2537 NULL, 2538 NULL, 2539 MatGetMultiProcBlock_MPIBAIJ, 2540 /*124*/ NULL, 2541 MatGetColumnReductions_MPIBAIJ, 2542 MatInvertBlockDiagonal_MPIBAIJ, 2543 NULL, 2544 NULL, 2545 /*129*/ NULL, 2546 NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 /*134*/ NULL, 2551 NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 /*139*/ MatSetBlockSizes_Default, 2556 NULL, 2557 NULL, 2558 MatFDColoringSetUp_MPIXAIJ, 2559 NULL, 2560 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2561 NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*150*/ NULL, 2567 MatEliminateZeros_MPIBAIJ, 2568 NULL}; 2569 2570 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2571 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2572 2573 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2574 { 2575 PetscInt m, rstart, cstart, cend; 2576 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2577 const PetscInt *JJ = NULL; 2578 PetscScalar *values = NULL; 2579 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2580 PetscBool nooffprocentries; 2581 2582 PetscFunctionBegin; 2583 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2584 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2585 PetscCall(PetscLayoutSetUp(B->rmap)); 2586 PetscCall(PetscLayoutSetUp(B->cmap)); 2587 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2588 m = B->rmap->n / bs; 2589 rstart = B->rmap->rstart / bs; 2590 cstart = B->cmap->rstart / bs; 2591 cend = B->cmap->rend / bs; 2592 2593 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2594 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2595 for (i = 0; i < m; i++) { 2596 nz = ii[i + 1] - ii[i]; 2597 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2598 nz_max = PetscMax(nz_max, nz); 2599 dlen = 0; 2600 olen = 0; 2601 JJ = jj + ii[i]; 2602 for (j = 0; j < nz; j++) { 2603 if (*JJ < cstart || *JJ >= cend) olen++; 2604 else dlen++; 2605 JJ++; 2606 } 2607 d_nnz[i] = dlen; 2608 o_nnz[i] = olen; 2609 } 2610 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2611 PetscCall(PetscFree2(d_nnz, o_nnz)); 2612 2613 values = (PetscScalar *)V; 2614 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2615 for (i = 0; i < m; i++) { 2616 PetscInt row = i + rstart; 2617 PetscInt ncols = ii[i + 1] - ii[i]; 2618 const PetscInt *icols = jj + ii[i]; 2619 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2620 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2621 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2622 } else { /* block ordering does not match so we can only insert one block at a time. */ 2623 PetscInt j; 2624 for (j = 0; j < ncols; j++) { 2625 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2626 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2627 } 2628 } 2629 } 2630 2631 if (!V) PetscCall(PetscFree(values)); 2632 nooffprocentries = B->nooffprocentries; 2633 B->nooffprocentries = PETSC_TRUE; 2634 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2635 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2636 B->nooffprocentries = nooffprocentries; 2637 2638 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2639 PetscFunctionReturn(PETSC_SUCCESS); 2640 } 2641 2642 /*@C 2643 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2644 2645 Collective 2646 2647 Input Parameters: 2648 + B - the matrix 2649 . bs - the block size 2650 . i - the indices into `j` for the start of each local row (starts with zero) 2651 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2652 - v - optional values in the matrix, use `NULL` if not provided 2653 2654 Level: advanced 2655 2656 Notes: 2657 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2658 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2659 called this routine. 2660 2661 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2662 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2663 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2664 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2665 block column and the second index is over columns within a block. 2666 2667 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2668 2669 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2670 @*/ 2671 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2672 { 2673 PetscFunctionBegin; 2674 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2675 PetscValidType(B, 1); 2676 PetscValidLogicalCollectiveInt(B, bs, 2); 2677 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2678 PetscFunctionReturn(PETSC_SUCCESS); 2679 } 2680 2681 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2682 { 2683 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2684 PetscInt i; 2685 PetscMPIInt size; 2686 2687 PetscFunctionBegin; 2688 if (B->hash_active) { 2689 B->ops[0] = b->cops; 2690 B->hash_active = PETSC_FALSE; 2691 } 2692 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2693 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2694 PetscCall(PetscLayoutSetUp(B->rmap)); 2695 PetscCall(PetscLayoutSetUp(B->cmap)); 2696 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2697 2698 if (d_nnz) { 2699 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2700 } 2701 if (o_nnz) { 2702 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2703 } 2704 2705 b->bs2 = bs * bs; 2706 b->mbs = B->rmap->n / bs; 2707 b->nbs = B->cmap->n / bs; 2708 b->Mbs = B->rmap->N / bs; 2709 b->Nbs = B->cmap->N / bs; 2710 2711 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2712 b->rstartbs = B->rmap->rstart / bs; 2713 b->rendbs = B->rmap->rend / bs; 2714 b->cstartbs = B->cmap->rstart / bs; 2715 b->cendbs = B->cmap->rend / bs; 2716 2717 #if defined(PETSC_USE_CTABLE) 2718 PetscCall(PetscHMapIDestroy(&b->colmap)); 2719 #else 2720 PetscCall(PetscFree(b->colmap)); 2721 #endif 2722 PetscCall(PetscFree(b->garray)); 2723 PetscCall(VecDestroy(&b->lvec)); 2724 PetscCall(VecScatterDestroy(&b->Mvctx)); 2725 2726 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2727 2728 MatSeqXAIJGetOptions_Private(b->B); 2729 PetscCall(MatDestroy(&b->B)); 2730 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2731 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2732 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2733 MatSeqXAIJRestoreOptions_Private(b->B); 2734 2735 MatSeqXAIJGetOptions_Private(b->A); 2736 PetscCall(MatDestroy(&b->A)); 2737 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2738 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2739 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2740 MatSeqXAIJRestoreOptions_Private(b->A); 2741 2742 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2743 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2744 B->preallocated = PETSC_TRUE; 2745 B->was_assembled = PETSC_FALSE; 2746 B->assembled = PETSC_FALSE; 2747 PetscFunctionReturn(PETSC_SUCCESS); 2748 } 2749 2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2752 2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2754 { 2755 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2756 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2757 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2758 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2759 2760 PetscFunctionBegin; 2761 PetscCall(PetscMalloc1(M + 1, &ii)); 2762 ii[0] = 0; 2763 for (i = 0; i < M; i++) { 2764 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2765 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2766 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2767 /* remove one from count of matrix has diagonal */ 2768 for (j = id[i]; j < id[i + 1]; j++) { 2769 if (jd[j] == i) { 2770 ii[i + 1]--; 2771 break; 2772 } 2773 } 2774 } 2775 PetscCall(PetscMalloc1(ii[M], &jj)); 2776 cnt = 0; 2777 for (i = 0; i < M; i++) { 2778 for (j = io[i]; j < io[i + 1]; j++) { 2779 if (garray[jo[j]] > rstart) break; 2780 jj[cnt++] = garray[jo[j]]; 2781 } 2782 for (k = id[i]; k < id[i + 1]; k++) { 2783 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2784 } 2785 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2786 } 2787 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2788 PetscFunctionReturn(PETSC_SUCCESS); 2789 } 2790 2791 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2792 2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2794 2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2796 { 2797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2798 Mat_MPIAIJ *b; 2799 Mat B; 2800 2801 PetscFunctionBegin; 2802 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2803 2804 if (reuse == MAT_REUSE_MATRIX) { 2805 B = *newmat; 2806 } else { 2807 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2808 PetscCall(MatSetType(B, MATMPIAIJ)); 2809 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2810 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2811 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2812 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2813 } 2814 b = (Mat_MPIAIJ *)B->data; 2815 2816 if (reuse == MAT_REUSE_MATRIX) { 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2818 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2819 } else { 2820 PetscInt *garray = a->garray; 2821 Mat_SeqAIJ *bB; 2822 PetscInt bs, nnz; 2823 PetscCall(MatDestroy(&b->A)); 2824 PetscCall(MatDestroy(&b->B)); 2825 /* just clear out the data structure */ 2826 PetscCall(MatDisAssemble_MPIAIJ(B)); 2827 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2828 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2829 2830 /* Global numbering for b->B columns */ 2831 bB = (Mat_SeqAIJ *)b->B->data; 2832 bs = A->rmap->bs; 2833 nnz = bB->i[A->rmap->n]; 2834 for (PetscInt k = 0; k < nnz; k++) { 2835 PetscInt bj = bB->j[k] / bs; 2836 PetscInt br = bB->j[k] % bs; 2837 bB->j[k] = garray[bj] * bs + br; 2838 } 2839 } 2840 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2841 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2842 2843 if (reuse == MAT_INPLACE_MATRIX) { 2844 PetscCall(MatHeaderReplace(A, &B)); 2845 } else { 2846 *newmat = B; 2847 } 2848 PetscFunctionReturn(PETSC_SUCCESS); 2849 } 2850 2851 /*MC 2852 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2853 2854 Options Database Keys: 2855 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2856 . -mat_block_size <bs> - set the blocksize used to store the matrix 2857 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2858 - -mat_use_hash_table <fact> - set hash table factor 2859 2860 Level: beginner 2861 2862 Note: 2863 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2864 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2865 2866 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2867 M*/ 2868 2869 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2870 2871 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2872 { 2873 Mat_MPIBAIJ *b; 2874 PetscBool flg = PETSC_FALSE; 2875 2876 PetscFunctionBegin; 2877 PetscCall(PetscNew(&b)); 2878 B->data = (void *)b; 2879 B->ops[0] = MatOps_Values; 2880 B->assembled = PETSC_FALSE; 2881 2882 B->insertmode = NOT_SET_VALUES; 2883 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2884 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2885 2886 /* build local table of row and column ownerships */ 2887 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2888 2889 /* build cache for off array entries formed */ 2890 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2891 2892 b->donotstash = PETSC_FALSE; 2893 b->colmap = NULL; 2894 b->garray = NULL; 2895 b->roworiented = PETSC_TRUE; 2896 2897 /* stuff used in block assembly */ 2898 b->barray = NULL; 2899 2900 /* stuff used for matrix vector multiply */ 2901 b->lvec = NULL; 2902 b->Mvctx = NULL; 2903 2904 /* stuff for MatGetRow() */ 2905 b->rowindices = NULL; 2906 b->rowvalues = NULL; 2907 b->getrowactive = PETSC_FALSE; 2908 2909 /* hash table stuff */ 2910 b->ht = NULL; 2911 b->hd = NULL; 2912 b->ht_size = 0; 2913 b->ht_flag = PETSC_FALSE; 2914 b->ht_fact = 0; 2915 b->ht_total_ct = 0; 2916 b->ht_insert_ct = 0; 2917 2918 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2919 b->ijonly = PETSC_FALSE; 2920 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2924 #if defined(PETSC_HAVE_HYPRE) 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2926 #endif 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2934 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2935 2936 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2937 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2938 if (flg) { 2939 PetscReal fact = 1.39; 2940 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2941 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2942 if (fact <= 1.0) fact = 1.39; 2943 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2944 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2945 } 2946 PetscOptionsEnd(); 2947 PetscFunctionReturn(PETSC_SUCCESS); 2948 } 2949 2950 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2951 /*MC 2952 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2953 2954 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2955 and `MATMPIBAIJ` otherwise. 2956 2957 Options Database Keys: 2958 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2959 2960 Level: beginner 2961 2962 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2963 M*/ 2964 2965 /*@C 2966 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2967 (block compressed row). 2968 2969 Collective 2970 2971 Input Parameters: 2972 + B - the matrix 2973 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2974 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2975 . d_nz - number of block nonzeros per block row in diagonal portion of local 2976 submatrix (same for all local rows) 2977 . d_nnz - array containing the number of block nonzeros in the various block rows 2978 of the in diagonal portion of the local (possibly different for each block 2979 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2980 set it even if it is zero. 2981 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2982 submatrix (same for all local rows). 2983 - o_nnz - array containing the number of nonzeros in the various block rows of the 2984 off-diagonal portion of the local submatrix (possibly different for 2985 each block row) or `NULL`. 2986 2987 If the *_nnz parameter is given then the *_nz parameter is ignored 2988 2989 Options Database Keys: 2990 + -mat_block_size - size of the blocks to use 2991 - -mat_use_hash_table <fact> - set hash table factor 2992 2993 Level: intermediate 2994 2995 Notes: 2996 For good matrix assembly performance 2997 the user should preallocate the matrix storage by setting the parameters 2998 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2999 performance can be increased by more than a factor of 50. 3000 3001 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3002 than it must be used on all processors that share the object for that argument. 3003 3004 Storage Information: 3005 For a square global matrix we define each processor's diagonal portion 3006 to be its local rows and the corresponding columns (a square submatrix); 3007 each processor's off-diagonal portion encompasses the remainder of the 3008 local matrix (a rectangular submatrix). 3009 3010 The user can specify preallocated storage for the diagonal part of 3011 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3012 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3013 memory allocation. Likewise, specify preallocated storage for the 3014 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3015 3016 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3017 the figure below we depict these three local rows and all columns (0-11). 3018 3019 .vb 3020 0 1 2 3 4 5 6 7 8 9 10 11 3021 -------------------------- 3022 row 3 |o o o d d d o o o o o o 3023 row 4 |o o o d d d o o o o o o 3024 row 5 |o o o d d d o o o o o o 3025 -------------------------- 3026 .ve 3027 3028 Thus, any entries in the d locations are stored in the d (diagonal) 3029 submatrix, and any entries in the o locations are stored in the 3030 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3031 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3032 3033 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3034 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3035 In general, for PDE problems in which most nonzeros are near the diagonal, 3036 one expects `d_nz` >> `o_nz`. 3037 3038 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3039 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3040 You can also run with the option `-info` and look for messages with the string 3041 malloc in them to see if additional memory allocation was needed. 3042 3043 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3044 @*/ 3045 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3046 { 3047 PetscFunctionBegin; 3048 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3049 PetscValidType(B, 1); 3050 PetscValidLogicalCollectiveInt(B, bs, 2); 3051 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3052 PetscFunctionReturn(PETSC_SUCCESS); 3053 } 3054 3055 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3056 /*@C 3057 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3058 (block compressed row). 3059 3060 Collective 3061 3062 Input Parameters: 3063 + comm - MPI communicator 3064 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3065 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3066 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3067 This value should be the same as the local size used in creating the 3068 y vector for the matrix-vector product y = Ax. 3069 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3070 This value should be the same as the local size used in creating the 3071 x vector for the matrix-vector product y = Ax. 3072 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3073 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3074 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3075 submatrix (same for all local rows) 3076 . d_nnz - array containing the number of nonzero blocks in the various block rows 3077 of the in diagonal portion of the local (possibly different for each block 3078 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3079 and set it even if it is zero. 3080 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3081 submatrix (same for all local rows). 3082 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3083 off-diagonal portion of the local submatrix (possibly different for 3084 each block row) or NULL. 3085 3086 Output Parameter: 3087 . A - the matrix 3088 3089 Options Database Keys: 3090 + -mat_block_size - size of the blocks to use 3091 - -mat_use_hash_table <fact> - set hash table factor 3092 3093 Level: intermediate 3094 3095 Notes: 3096 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3097 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3098 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3099 3100 For good matrix assembly performance 3101 the user should preallocate the matrix storage by setting the parameters 3102 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3103 performance can be increased by more than a factor of 50. 3104 3105 If the *_nnz parameter is given then the *_nz parameter is ignored 3106 3107 A nonzero block is any block that as 1 or more nonzeros in it 3108 3109 The user MUST specify either the local or global matrix dimensions 3110 (possibly both). 3111 3112 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3113 than it must be used on all processors that share the object for that argument. 3114 3115 Storage Information: 3116 For a square global matrix we define each processor's diagonal portion 3117 to be its local rows and the corresponding columns (a square submatrix); 3118 each processor's off-diagonal portion encompasses the remainder of the 3119 local matrix (a rectangular submatrix). 3120 3121 The user can specify preallocated storage for the diagonal part of 3122 the local submatrix with either d_nz or d_nnz (not both). Set 3123 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3124 memory allocation. Likewise, specify preallocated storage for the 3125 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3126 3127 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3128 the figure below we depict these three local rows and all columns (0-11). 3129 3130 .vb 3131 0 1 2 3 4 5 6 7 8 9 10 11 3132 -------------------------- 3133 row 3 |o o o d d d o o o o o o 3134 row 4 |o o o d d d o o o o o o 3135 row 5 |o o o d d d o o o o o o 3136 -------------------------- 3137 .ve 3138 3139 Thus, any entries in the d locations are stored in the d (diagonal) 3140 submatrix, and any entries in the o locations are stored in the 3141 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3142 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3143 3144 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3145 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3146 In general, for PDE problems in which most nonzeros are near the diagonal, 3147 one expects `d_nz` >> `o_nz`. 3148 3149 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3150 @*/ 3151 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3152 { 3153 PetscMPIInt size; 3154 3155 PetscFunctionBegin; 3156 PetscCall(MatCreate(comm, A)); 3157 PetscCall(MatSetSizes(*A, m, n, M, N)); 3158 PetscCallMPI(MPI_Comm_size(comm, &size)); 3159 if (size > 1) { 3160 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3161 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3162 } else { 3163 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3164 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3165 } 3166 PetscFunctionReturn(PETSC_SUCCESS); 3167 } 3168 3169 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3170 { 3171 Mat mat; 3172 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3173 PetscInt len = 0; 3174 3175 PetscFunctionBegin; 3176 *newmat = NULL; 3177 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3178 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3179 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3180 3181 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3182 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3183 if (matin->hash_active) { 3184 PetscCall(MatSetUp(mat)); 3185 } else { 3186 mat->factortype = matin->factortype; 3187 mat->preallocated = PETSC_TRUE; 3188 mat->assembled = PETSC_TRUE; 3189 mat->insertmode = NOT_SET_VALUES; 3190 3191 a = (Mat_MPIBAIJ *)mat->data; 3192 mat->rmap->bs = matin->rmap->bs; 3193 a->bs2 = oldmat->bs2; 3194 a->mbs = oldmat->mbs; 3195 a->nbs = oldmat->nbs; 3196 a->Mbs = oldmat->Mbs; 3197 a->Nbs = oldmat->Nbs; 3198 3199 a->size = oldmat->size; 3200 a->rank = oldmat->rank; 3201 a->donotstash = oldmat->donotstash; 3202 a->roworiented = oldmat->roworiented; 3203 a->rowindices = NULL; 3204 a->rowvalues = NULL; 3205 a->getrowactive = PETSC_FALSE; 3206 a->barray = NULL; 3207 a->rstartbs = oldmat->rstartbs; 3208 a->rendbs = oldmat->rendbs; 3209 a->cstartbs = oldmat->cstartbs; 3210 a->cendbs = oldmat->cendbs; 3211 3212 /* hash table stuff */ 3213 a->ht = NULL; 3214 a->hd = NULL; 3215 a->ht_size = 0; 3216 a->ht_flag = oldmat->ht_flag; 3217 a->ht_fact = oldmat->ht_fact; 3218 a->ht_total_ct = 0; 3219 a->ht_insert_ct = 0; 3220 3221 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3222 if (oldmat->colmap) { 3223 #if defined(PETSC_USE_CTABLE) 3224 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3225 #else 3226 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3227 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3228 #endif 3229 } else a->colmap = NULL; 3230 3231 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3232 PetscCall(PetscMalloc1(len, &a->garray)); 3233 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3234 } else a->garray = NULL; 3235 3236 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3237 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3238 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3239 3240 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3241 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3242 } 3243 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3244 *newmat = mat; 3245 PetscFunctionReturn(PETSC_SUCCESS); 3246 } 3247 3248 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3249 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3250 { 3251 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3252 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3253 PetscScalar *matvals; 3254 3255 PetscFunctionBegin; 3256 PetscCall(PetscViewerSetUp(viewer)); 3257 3258 /* read in matrix header */ 3259 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3260 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3261 M = header[1]; 3262 N = header[2]; 3263 nz = header[3]; 3264 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3265 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3266 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3267 3268 /* set block sizes from the viewer's .info file */ 3269 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3270 /* set local sizes if not set already */ 3271 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3272 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3273 /* set global sizes if not set already */ 3274 if (mat->rmap->N < 0) mat->rmap->N = M; 3275 if (mat->cmap->N < 0) mat->cmap->N = N; 3276 PetscCall(PetscLayoutSetUp(mat->rmap)); 3277 PetscCall(PetscLayoutSetUp(mat->cmap)); 3278 3279 /* check if the matrix sizes are correct */ 3280 PetscCall(MatGetSize(mat, &rows, &cols)); 3281 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3282 PetscCall(MatGetBlockSize(mat, &bs)); 3283 PetscCall(MatGetLocalSize(mat, &m, &n)); 3284 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3285 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3286 mbs = m / bs; 3287 nbs = n / bs; 3288 3289 /* read in row lengths and build row indices */ 3290 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3291 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3292 rowidxs[0] = 0; 3293 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3294 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3295 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3296 3297 /* read in column indices and matrix values */ 3298 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3299 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3301 3302 { /* preallocate matrix storage */ 3303 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3304 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3305 PetscBool sbaij, done; 3306 PetscInt *d_nnz, *o_nnz; 3307 3308 PetscCall(PetscBTCreate(nbs, &bt)); 3309 PetscCall(PetscHSetICreate(&ht)); 3310 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3311 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3312 for (i = 0; i < mbs; i++) { 3313 PetscCall(PetscBTMemzero(nbs, bt)); 3314 PetscCall(PetscHSetIClear(ht)); 3315 for (k = 0; k < bs; k++) { 3316 PetscInt row = bs * i + k; 3317 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3318 PetscInt col = colidxs[j]; 3319 if (!sbaij || col >= row) { 3320 if (col >= cs && col < ce) { 3321 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3322 } else { 3323 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3324 if (done) o_nnz[i]++; 3325 } 3326 } 3327 } 3328 } 3329 } 3330 PetscCall(PetscBTDestroy(&bt)); 3331 PetscCall(PetscHSetIDestroy(&ht)); 3332 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3333 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3334 PetscCall(PetscFree2(d_nnz, o_nnz)); 3335 } 3336 3337 /* store matrix values */ 3338 for (i = 0; i < m; i++) { 3339 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3340 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3341 } 3342 3343 PetscCall(PetscFree(rowidxs)); 3344 PetscCall(PetscFree2(colidxs, matvals)); 3345 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3346 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3347 PetscFunctionReturn(PETSC_SUCCESS); 3348 } 3349 3350 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3351 { 3352 PetscBool isbinary; 3353 3354 PetscFunctionBegin; 3355 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3356 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3357 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3358 PetscFunctionReturn(PETSC_SUCCESS); 3359 } 3360 3361 /*@ 3362 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3363 3364 Input Parameters: 3365 + mat - the matrix 3366 - fact - factor 3367 3368 Options Database Key: 3369 . -mat_use_hash_table <fact> - provide the factor 3370 3371 Level: advanced 3372 3373 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3374 @*/ 3375 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3376 { 3377 PetscFunctionBegin; 3378 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3379 PetscFunctionReturn(PETSC_SUCCESS); 3380 } 3381 3382 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3383 { 3384 Mat_MPIBAIJ *baij; 3385 3386 PetscFunctionBegin; 3387 baij = (Mat_MPIBAIJ *)mat->data; 3388 baij->ht_fact = fact; 3389 PetscFunctionReturn(PETSC_SUCCESS); 3390 } 3391 3392 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3393 { 3394 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3395 PetscBool flg; 3396 3397 PetscFunctionBegin; 3398 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3399 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3400 if (Ad) *Ad = a->A; 3401 if (Ao) *Ao = a->B; 3402 if (colmap) *colmap = a->garray; 3403 PetscFunctionReturn(PETSC_SUCCESS); 3404 } 3405 3406 /* 3407 Special version for direct calls from Fortran (to eliminate two function call overheads 3408 */ 3409 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3410 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3411 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3412 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3413 #endif 3414 3415 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3416 /*@C 3417 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3418 3419 Collective 3420 3421 Input Parameters: 3422 + matin - the matrix 3423 . min - number of input rows 3424 . im - input rows 3425 . nin - number of input columns 3426 . in - input columns 3427 . v - numerical values input 3428 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3429 3430 Level: advanced 3431 3432 Developer Notes: 3433 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3434 3435 .seealso: `Mat`, `MatSetValuesBlocked()` 3436 @*/ 3437 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3438 { 3439 /* convert input arguments to C version */ 3440 Mat mat = *matin; 3441 PetscInt m = *min, n = *nin; 3442 InsertMode addv = *addvin; 3443 3444 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3445 const MatScalar *value; 3446 MatScalar *barray = baij->barray; 3447 PetscBool roworiented = baij->roworiented; 3448 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3449 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3450 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3451 3452 PetscFunctionBegin; 3453 /* tasks normally handled by MatSetValuesBlocked() */ 3454 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3455 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3456 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3457 if (mat->assembled) { 3458 mat->was_assembled = PETSC_TRUE; 3459 mat->assembled = PETSC_FALSE; 3460 } 3461 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3462 3463 if (!barray) { 3464 PetscCall(PetscMalloc1(bs2, &barray)); 3465 baij->barray = barray; 3466 } 3467 3468 if (roworiented) stepval = (n - 1) * bs; 3469 else stepval = (m - 1) * bs; 3470 3471 for (i = 0; i < m; i++) { 3472 if (im[i] < 0) continue; 3473 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3474 if (im[i] >= rstart && im[i] < rend) { 3475 row = im[i] - rstart; 3476 for (j = 0; j < n; j++) { 3477 /* If NumCol = 1 then a copy is not required */ 3478 if ((roworiented) && (n == 1)) { 3479 barray = (MatScalar *)v + i * bs2; 3480 } else if ((!roworiented) && (m == 1)) { 3481 barray = (MatScalar *)v + j * bs2; 3482 } else { /* Here a copy is required */ 3483 if (roworiented) { 3484 value = v + i * (stepval + bs) * bs + j * bs; 3485 } else { 3486 value = v + j * (stepval + bs) * bs + i * bs; 3487 } 3488 for (ii = 0; ii < bs; ii++, value += stepval) { 3489 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3490 } 3491 barray -= bs2; 3492 } 3493 3494 if (in[j] >= cstart && in[j] < cend) { 3495 col = in[j] - cstart; 3496 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3497 } else if (in[j] < 0) { 3498 continue; 3499 } else { 3500 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3501 if (mat->was_assembled) { 3502 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3503 3504 #if defined(PETSC_USE_DEBUG) 3505 #if defined(PETSC_USE_CTABLE) 3506 { 3507 PetscInt data; 3508 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3509 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3510 } 3511 #else 3512 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3513 #endif 3514 #endif 3515 #if defined(PETSC_USE_CTABLE) 3516 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3517 col = (col - 1) / bs; 3518 #else 3519 col = (baij->colmap[in[j]] - 1) / bs; 3520 #endif 3521 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3522 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3523 col = in[j]; 3524 } 3525 } else col = in[j]; 3526 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3527 } 3528 } 3529 } else { 3530 if (!baij->donotstash) { 3531 if (roworiented) { 3532 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3533 } else { 3534 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3535 } 3536 } 3537 } 3538 } 3539 3540 /* task normally handled by MatSetValuesBlocked() */ 3541 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3542 PetscFunctionReturn(PETSC_SUCCESS); 3543 } 3544 3545 /*@ 3546 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3547 3548 Collective 3549 3550 Input Parameters: 3551 + comm - MPI communicator 3552 . bs - the block size, only a block size of 1 is supported 3553 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3554 . n - This value should be the same as the local size used in creating the 3555 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3556 calculated if `N` is given) For square matrices `n` is almost always `m`. 3557 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3558 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3559 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3560 . j - column indices 3561 - a - matrix values 3562 3563 Output Parameter: 3564 . mat - the matrix 3565 3566 Level: intermediate 3567 3568 Notes: 3569 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3570 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3571 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3572 3573 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3574 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3575 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3576 with column-major ordering within blocks. 3577 3578 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3579 3580 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3581 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3582 @*/ 3583 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3584 { 3585 PetscFunctionBegin; 3586 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3587 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3588 PetscCall(MatCreate(comm, mat)); 3589 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3590 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3591 PetscCall(MatSetBlockSize(*mat, bs)); 3592 PetscCall(MatSetUp(*mat)); 3593 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3594 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3595 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3596 PetscFunctionReturn(PETSC_SUCCESS); 3597 } 3598 3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3600 { 3601 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3602 PetscInt *indx; 3603 PetscScalar *values; 3604 3605 PetscFunctionBegin; 3606 PetscCall(MatGetSize(inmat, &m, &N)); 3607 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3609 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3610 PetscInt *bindx, rmax = a->rmax, j; 3611 PetscMPIInt rank, size; 3612 3613 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3614 mbs = m / bs; 3615 Nbs = N / cbs; 3616 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3617 nbs = n / cbs; 3618 3619 PetscCall(PetscMalloc1(rmax, &bindx)); 3620 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3621 3622 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3623 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3624 if (rank == size - 1) { 3625 /* Check sum(nbs) = Nbs */ 3626 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3627 } 3628 3629 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3630 for (i = 0; i < mbs; i++) { 3631 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3632 nnz = nnz / bs; 3633 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3634 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3635 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3636 } 3637 PetscCall(PetscFree(bindx)); 3638 3639 PetscCall(MatCreate(comm, outmat)); 3640 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3641 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3642 PetscCall(MatSetType(*outmat, MATBAIJ)); 3643 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3644 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3645 MatPreallocateEnd(dnz, onz); 3646 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3647 } 3648 3649 /* numeric phase */ 3650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3651 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3652 3653 for (i = 0; i < m; i++) { 3654 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3655 Ii = i + rstart; 3656 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3657 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3658 } 3659 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3660 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3661 PetscFunctionReturn(PETSC_SUCCESS); 3662 } 3663