1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *vv; 62 Vec vB, vA; 63 const PetscScalar *va, *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayRead(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(v, &vv)); 92 PetscCall(VecRestoreArrayRead(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)B->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)B->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 PetscCheck(idxm[i] >= bsrstart && idxm[i] < bsrend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if (data < 0 || baij->garray[data / bs] != idxn[j] / bs) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } 686 PetscFunctionReturn(PETSC_SUCCESS); 687 } 688 689 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 690 { 691 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 692 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 693 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 694 PetscReal sum = 0.0; 695 MatScalar *v; 696 697 PetscFunctionBegin; 698 if (baij->size == 1) { 699 PetscCall(MatNorm(baij->A, type, nrm)); 700 } else { 701 if (type == NORM_FROBENIUS) { 702 v = amat->a; 703 nz = amat->nz * bs2; 704 for (i = 0; i < nz; i++) { 705 sum += PetscRealPart(PetscConj(*v) * (*v)); 706 v++; 707 } 708 v = bmat->a; 709 nz = bmat->nz * bs2; 710 for (i = 0; i < nz; i++) { 711 sum += PetscRealPart(PetscConj(*v) * (*v)); 712 v++; 713 } 714 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 715 *nrm = PetscSqrtReal(*nrm); 716 } else if (type == NORM_1) { /* max column sum */ 717 Vec col, bcol; 718 PetscScalar *array; 719 PetscInt *jj, *garray = baij->garray; 720 721 PetscCall(MatCreateVecs(mat, &col, NULL)); 722 PetscCall(VecSet(col, 0.0)); 723 PetscCall(VecGetArrayWrite(col, &array)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 PetscInt col = bs * *jj + j; /* column index */ 729 730 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++); 731 } 732 jj++; 733 } 734 PetscCall(VecRestoreArrayWrite(col, &array)); 735 PetscCall(MatCreateVecs(baij->B, &bcol, NULL)); 736 PetscCall(VecSet(bcol, 0.0)); 737 PetscCall(VecGetArrayWrite(bcol, &array)); 738 v = bmat->a; 739 jj = bmat->j; 740 for (i = 0; i < bmat->nz; i++) { 741 for (j = 0; j < bs; j++) { 742 PetscInt col = bs * *jj + j; /* column index */ 743 744 for (row = 0; row < bs; row++) array[col] += PetscAbsScalar(*v++); 745 } 746 jj++; 747 } 748 PetscCall(VecSetValuesBlocked(col, bmat->nbs, garray, array, ADD_VALUES)); 749 PetscCall(VecRestoreArrayWrite(bcol, &array)); 750 PetscCall(VecDestroy(&bcol)); 751 PetscCall(VecAssemblyBegin(col)); 752 PetscCall(VecAssemblyEnd(col)); 753 PetscCall(VecNorm(col, NORM_INFINITY, nrm)); 754 PetscCall(VecDestroy(&col)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCallMPI(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entries need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? 0.0 : (double)(((PetscReal)(ct + j)) / j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, all_assembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the row-oriented flag to column-oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 940 while (1) { 941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 942 if (!flg) break; 943 944 for (i = 0; i < n;) { 945 /* Now identify the consecutive vals belonging to the same row */ 946 for (j = i, rstart = row[j]; j < n; j++) { 947 if (row[j] != rstart) break; 948 } 949 if (j < n) ncols = j - i; 950 else ncols = n - i; 951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 952 i = j; 953 } 954 } 955 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 956 957 baij->roworiented = r1; 958 a->roworiented = r2; 959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 960 } 961 962 PetscCall(MatAssemblyBegin(baij->A, mode)); 963 PetscCall(MatAssemblyEnd(baij->A, mode)); 964 965 /* determine if any process has disassembled, if so we must 966 also disassemble ourselves, in order that we may reassemble. */ 967 /* 968 if nonzero structure of submatrix B cannot change then we know that 969 no process disassembled thus we can skip this stuff 970 */ 971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 972 PetscCallMPI(MPIU_Allreduce(&mat->was_assembled, &all_assembled, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 973 if (mat->was_assembled && !all_assembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 974 } 975 976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 977 PetscCall(MatAssemblyBegin(baij->B, mode)); 978 PetscCall(MatAssemblyEnd(baij->B, mode)); 979 980 #if defined(PETSC_USE_INFO) 981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 983 984 baij->ht_total_ct = 0; 985 baij->ht_insert_ct = 0; 986 } 987 #endif 988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 990 991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 993 } 994 995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 996 997 baij->rowvalues = NULL; 998 999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1002 PetscCallMPI(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 #include <petscdraw.h> 1008 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1009 { 1010 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1011 PetscMPIInt rank = baij->rank; 1012 PetscInt bs = mat->rmap->bs; 1013 PetscBool isascii, isdraw; 1014 PetscViewer sviewer; 1015 PetscViewerFormat format; 1016 1017 PetscFunctionBegin; 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1020 if (isascii) { 1021 PetscCall(PetscViewerGetFormat(viewer, &format)); 1022 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1023 MatInfo info; 1024 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1025 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1026 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1027 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1028 mat->rmap->bs, info.memory)); 1029 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1031 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(PetscViewerFlush(viewer)); 1034 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1035 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1036 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1037 PetscFunctionReturn(PETSC_SUCCESS); 1038 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1039 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } 1044 } 1045 1046 if (isdraw) { 1047 PetscDraw draw; 1048 PetscBool isnull; 1049 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1050 PetscCall(PetscDrawIsNull(draw, &isnull)); 1051 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1052 } 1053 1054 { 1055 /* assemble the entire matrix onto first processor. */ 1056 Mat A; 1057 Mat_SeqBAIJ *Aloc; 1058 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1059 MatScalar *a; 1060 const char *matname; 1061 1062 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1063 /* Perhaps this should be the type of mat? */ 1064 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1065 if (rank == 0) { 1066 PetscCall(MatSetSizes(A, M, N, M, N)); 1067 } else { 1068 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1069 } 1070 PetscCall(MatSetType(A, MATMPIBAIJ)); 1071 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1072 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1073 1074 /* copy over the A part */ 1075 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1076 ai = Aloc->i; 1077 aj = Aloc->j; 1078 a = Aloc->a; 1079 PetscCall(PetscMalloc1(bs, &rvals)); 1080 1081 for (i = 0; i < mbs; i++) { 1082 rvals[0] = bs * (baij->rstartbs + i); 1083 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1084 for (j = ai[i]; j < ai[i + 1]; j++) { 1085 col = (baij->cstartbs + aj[j]) * bs; 1086 for (k = 0; k < bs; k++) { 1087 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1088 col++; 1089 a += bs; 1090 } 1091 } 1092 } 1093 /* copy over the B part */ 1094 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1095 ai = Aloc->i; 1096 aj = Aloc->j; 1097 a = Aloc->a; 1098 for (i = 0; i < mbs; i++) { 1099 rvals[0] = bs * (baij->rstartbs + i); 1100 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1101 for (j = ai[i]; j < ai[i + 1]; j++) { 1102 col = baij->garray[aj[j]] * bs; 1103 for (k = 0; k < bs; k++) { 1104 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1105 col++; 1106 a += bs; 1107 } 1108 } 1109 } 1110 PetscCall(PetscFree(rvals)); 1111 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1112 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1113 /* 1114 Everyone has to call to draw the matrix since the graphics waits are 1115 synchronized across all processors that share the PetscDraw object 1116 */ 1117 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1118 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1119 if (rank == 0) { 1120 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1121 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1122 } 1123 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1124 PetscCall(MatDestroy(&A)); 1125 } 1126 PetscFunctionReturn(PETSC_SUCCESS); 1127 } 1128 1129 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1130 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1131 { 1132 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1133 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1134 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1135 const PetscInt *garray = aij->garray; 1136 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1137 PetscCount nz, hnz; 1138 PetscInt *rowlens, *colidxs; 1139 PetscScalar *matvals; 1140 PetscMPIInt rank; 1141 1142 PetscFunctionBegin; 1143 PetscCall(PetscViewerSetUp(viewer)); 1144 1145 M = mat->rmap->N; 1146 N = mat->cmap->N; 1147 m = mat->rmap->n; 1148 rs = mat->rmap->rstart; 1149 cs = mat->cmap->rstart; 1150 bs = mat->rmap->bs; 1151 nz = bs * bs * (A->nz + B->nz); 1152 1153 /* write matrix header */ 1154 header[0] = MAT_FILE_CLASSID; 1155 header[1] = M; 1156 header[2] = N; 1157 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1158 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1159 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1160 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1161 1162 /* fill in and store row lengths */ 1163 PetscCall(PetscMalloc1(m, &rowlens)); 1164 for (cnt = 0, i = 0; i < A->mbs; i++) 1165 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1166 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1167 PetscCall(PetscFree(rowlens)); 1168 1169 /* fill in and store column indices */ 1170 PetscCall(PetscMalloc1(nz, &colidxs)); 1171 for (cnt = 0, i = 0; i < A->mbs; i++) { 1172 for (k = 0; k < bs; k++) { 1173 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1174 if (garray[B->j[jb]] > cs / bs) break; 1175 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1176 } 1177 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1179 for (; jb < B->i[i + 1]; jb++) 1180 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1181 } 1182 } 1183 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1184 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1185 PetscCall(PetscFree(colidxs)); 1186 1187 /* fill in and store nonzero values */ 1188 PetscCall(PetscMalloc1(nz, &matvals)); 1189 for (cnt = 0, i = 0; i < A->mbs; i++) { 1190 for (k = 0; k < bs; k++) { 1191 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1192 if (garray[B->j[jb]] > cs / bs) break; 1193 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1194 } 1195 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1196 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1197 for (; jb < B->i[i + 1]; jb++) 1198 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1199 } 1200 } 1201 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1202 PetscCall(PetscFree(matvals)); 1203 1204 /* write block size option to the viewer's .info file */ 1205 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1206 PetscFunctionReturn(PETSC_SUCCESS); 1207 } 1208 1209 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1210 { 1211 PetscBool isascii, isdraw, issocket, isbinary; 1212 1213 PetscFunctionBegin; 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &isascii)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1218 if (isascii || isdraw || issocket) PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1219 else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1226 PetscInt nt; 1227 1228 PetscFunctionBegin; 1229 PetscCall(VecGetLocalSize(xx, &nt)); 1230 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1231 PetscCall(VecGetLocalSize(yy, &nt)); 1232 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1233 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1234 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1235 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1237 PetscFunctionReturn(PETSC_SUCCESS); 1238 } 1239 1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1243 1244 PetscFunctionBegin; 1245 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1246 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1247 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1255 1256 PetscFunctionBegin; 1257 /* do nondiagonal part */ 1258 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1259 /* do local part */ 1260 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1261 /* add partial results together */ 1262 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1270 1271 PetscFunctionBegin; 1272 /* do nondiagonal part */ 1273 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1274 /* do local part */ 1275 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1276 /* add partial results together */ 1277 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* 1283 This only works correctly for square matrices where the subblock A->A is the 1284 diagonal block 1285 */ 1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1287 { 1288 PetscFunctionBegin; 1289 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1290 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatScale(a->A, aa)); 1300 PetscCall(MatScale(a->B, aa)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 static PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1305 { 1306 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1307 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1308 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1309 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1310 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1311 1312 PetscFunctionBegin; 1313 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1314 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1315 mat->getrowactive = PETSC_TRUE; 1316 1317 if (!mat->rowvalues && (idx || v)) { 1318 /* 1319 allocate enough space to hold information from the longest row. 1320 */ 1321 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1322 PetscInt max = 1, mbs = mat->mbs, tmp; 1323 for (i = 0; i < mbs; i++) { 1324 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1325 if (max < tmp) max = tmp; 1326 } 1327 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1328 } 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; 1332 pcA = &cworkA; 1333 pvB = &vworkB; 1334 pcB = &cworkB; 1335 if (!v) { 1336 pvA = NULL; 1337 pvB = NULL; 1338 } 1339 if (!idx) { 1340 pcA = NULL; 1341 if (!v) pcB = NULL; 1342 } 1343 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1344 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1345 nztot = nzA + nzB; 1346 1347 cmap = mat->garray; 1348 if (v || idx) { 1349 if (nztot) { 1350 /* Sort by increasing column numbers, assuming A and B already sorted */ 1351 PetscInt imark = -1; 1352 if (v) { 1353 *v = v_p = mat->rowvalues; 1354 for (i = 0; i < nzB; i++) { 1355 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1356 else break; 1357 } 1358 imark = i; 1359 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1360 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1361 } 1362 if (idx) { 1363 *idx = idx_p = mat->rowindices; 1364 if (imark > -1) { 1365 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1366 } else { 1367 for (i = 0; i < nzB; i++) { 1368 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 else break; 1370 } 1371 imark = i; 1372 } 1373 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1374 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1375 } 1376 } else { 1377 if (idx) *idx = NULL; 1378 if (v) *v = NULL; 1379 } 1380 } 1381 *nz = nztot; 1382 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1383 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 static PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1388 { 1389 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1390 1391 PetscFunctionBegin; 1392 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1393 baij->getrowactive = PETSC_FALSE; 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1398 { 1399 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatZeroEntries(l->A)); 1403 PetscCall(MatZeroEntries(l->B)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1410 Mat A = a->A, B = a->B; 1411 PetscLogDouble isend[5], irecv[5]; 1412 1413 PetscFunctionBegin; 1414 info->block_size = (PetscReal)matin->rmap->bs; 1415 1416 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1417 1418 isend[0] = info->nz_used; 1419 isend[1] = info->nz_allocated; 1420 isend[2] = info->nz_unneeded; 1421 isend[3] = info->memory; 1422 isend[4] = info->mallocs; 1423 1424 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1425 1426 isend[0] += info->nz_used; 1427 isend[1] += info->nz_allocated; 1428 isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; 1430 isend[4] += info->mallocs; 1431 1432 if (flag == MAT_LOCAL) { 1433 info->nz_used = isend[0]; 1434 info->nz_allocated = isend[1]; 1435 info->nz_unneeded = isend[2]; 1436 info->memory = isend[3]; 1437 info->mallocs = isend[4]; 1438 } else if (flag == MAT_GLOBAL_MAX) { 1439 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1440 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else if (flag == MAT_GLOBAL_SUM) { 1447 PetscCallMPI(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1448 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1455 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1464 1465 PetscFunctionBegin; 1466 switch (op) { 1467 case MAT_NEW_NONZERO_LOCATIONS: 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1470 case MAT_KEEP_NONZERO_PATTERN: 1471 case MAT_NEW_NONZERO_LOCATION_ERR: 1472 MatCheckPreallocated(A, 1); 1473 PetscCall(MatSetOption(a->A, op, flg)); 1474 PetscCall(MatSetOption(a->B, op, flg)); 1475 break; 1476 case MAT_ROW_ORIENTED: 1477 MatCheckPreallocated(A, 1); 1478 a->roworiented = flg; 1479 1480 PetscCall(MatSetOption(a->A, op, flg)); 1481 PetscCall(MatSetOption(a->B, op, flg)); 1482 break; 1483 case MAT_IGNORE_OFF_PROC_ENTRIES: 1484 a->donotstash = flg; 1485 break; 1486 case MAT_USE_HASH_TABLE: 1487 a->ht_flag = flg; 1488 a->ht_fact = 1.39; 1489 break; 1490 case MAT_SYMMETRIC: 1491 case MAT_STRUCTURALLY_SYMMETRIC: 1492 case MAT_HERMITIAN: 1493 case MAT_SYMMETRY_ETERNAL: 1494 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1495 case MAT_SPD_ETERNAL: 1496 /* if the diagonal matrix is square it inherits some of the properties above */ 1497 if (a->A && A->rmap->n == A->cmap->n) PetscCall(MatSetOption(a->A, op, flg)); 1498 break; 1499 default: 1500 break; 1501 } 1502 PetscFunctionReturn(PETSC_SUCCESS); 1503 } 1504 1505 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1506 { 1507 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1508 Mat_SeqBAIJ *Aloc; 1509 Mat B; 1510 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1511 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1512 MatScalar *a; 1513 1514 PetscFunctionBegin; 1515 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1516 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1517 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1518 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1519 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1520 /* Do not know preallocation information, but must set block size */ 1521 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1522 } else { 1523 B = *matout; 1524 } 1525 1526 /* copy over the A part */ 1527 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1528 ai = Aloc->i; 1529 aj = Aloc->j; 1530 a = Aloc->a; 1531 PetscCall(PetscMalloc1(bs, &rvals)); 1532 1533 for (i = 0; i < mbs; i++) { 1534 rvals[0] = bs * (baij->rstartbs + i); 1535 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1536 for (j = ai[i]; j < ai[i + 1]; j++) { 1537 col = (baij->cstartbs + aj[j]) * bs; 1538 for (k = 0; k < bs; k++) { 1539 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1540 1541 col++; 1542 a += bs; 1543 } 1544 } 1545 } 1546 /* copy over the B part */ 1547 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1548 ai = Aloc->i; 1549 aj = Aloc->j; 1550 a = Aloc->a; 1551 for (i = 0; i < mbs; i++) { 1552 rvals[0] = bs * (baij->rstartbs + i); 1553 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1554 for (j = ai[i]; j < ai[i + 1]; j++) { 1555 col = baij->garray[aj[j]] * bs; 1556 for (k = 0; k < bs; k++) { 1557 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1558 col++; 1559 a += bs; 1560 } 1561 } 1562 } 1563 PetscCall(PetscFree(rvals)); 1564 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1565 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1566 1567 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1568 else PetscCall(MatHeaderMerge(A, &B)); 1569 PetscFunctionReturn(PETSC_SUCCESS); 1570 } 1571 1572 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1573 { 1574 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1575 Mat a = baij->A, b = baij->B; 1576 PetscInt s1, s2, s3; 1577 1578 PetscFunctionBegin; 1579 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1580 if (rr) { 1581 PetscCall(VecGetLocalSize(rr, &s1)); 1582 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1583 /* Overlap communication with computation. */ 1584 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1585 } 1586 if (ll) { 1587 PetscCall(VecGetLocalSize(ll, &s1)); 1588 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1589 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1590 } 1591 /* scale the diagonal block */ 1592 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1593 1594 if (rr) { 1595 /* Do a scatter end and then right scale the off-diagonal block */ 1596 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1597 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1598 } 1599 PetscFunctionReturn(PETSC_SUCCESS); 1600 } 1601 1602 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1603 { 1604 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1605 PetscInt *lrows; 1606 PetscInt r, len; 1607 PetscBool cong; 1608 1609 PetscFunctionBegin; 1610 /* get locally owned rows */ 1611 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1612 /* fix right-hand side if needed */ 1613 if (x && b) { 1614 const PetscScalar *xx; 1615 PetscScalar *bb; 1616 1617 PetscCall(VecGetArrayRead(x, &xx)); 1618 PetscCall(VecGetArray(b, &bb)); 1619 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1620 PetscCall(VecRestoreArrayRead(x, &xx)); 1621 PetscCall(VecRestoreArray(b, &bb)); 1622 } 1623 1624 /* actually zap the local rows */ 1625 /* 1626 Zero the required rows. If the "diagonal block" of the matrix 1627 is square and the user wishes to set the diagonal we use separate 1628 code so that MatSetValues() is not called for each diagonal allocating 1629 new memory, thus calling lots of mallocs and slowing things down. 1630 1631 */ 1632 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1633 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1634 PetscCall(MatHasCongruentLayouts(A, &cong)); 1635 if ((diag != 0.0) && cong) { 1636 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1637 } else if (diag != 0.0) { 1638 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1639 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1640 for (r = 0; r < len; ++r) { 1641 const PetscInt row = lrows[r] + A->rmap->rstart; 1642 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1643 } 1644 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1645 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1646 } else { 1647 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1648 } 1649 PetscCall(PetscFree(lrows)); 1650 1651 /* only change matrix nonzero state if pattern was allowed to be changed */ 1652 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1653 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1654 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1655 } 1656 PetscFunctionReturn(PETSC_SUCCESS); 1657 } 1658 1659 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1660 { 1661 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1662 PetscMPIInt n, p = 0; 1663 PetscInt i, j, k, r, len = 0, row, col, count; 1664 PetscInt *lrows, *owners = A->rmap->range; 1665 PetscSFNode *rrows; 1666 PetscSF sf; 1667 const PetscScalar *xx; 1668 PetscScalar *bb, *mask; 1669 Vec xmask, lmask; 1670 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1671 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1672 PetscScalar *aa; 1673 1674 PetscFunctionBegin; 1675 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1676 /* Create SF where leaves are input rows and roots are owned rows */ 1677 PetscCall(PetscMalloc1(n, &lrows)); 1678 for (r = 0; r < n; ++r) lrows[r] = -1; 1679 PetscCall(PetscMalloc1(N, &rrows)); 1680 for (r = 0; r < N; ++r) { 1681 const PetscInt idx = rows[r]; 1682 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1683 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1684 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1685 } 1686 rrows[r].rank = p; 1687 rrows[r].index = rows[r] - owners[p]; 1688 } 1689 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1690 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1691 /* Collect flags for rows to be zeroed */ 1692 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1693 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1694 PetscCall(PetscSFDestroy(&sf)); 1695 /* Compress and put in row numbers */ 1696 for (r = 0; r < n; ++r) 1697 if (lrows[r] >= 0) lrows[len++] = r; 1698 /* zero diagonal part of matrix */ 1699 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1700 /* handle off-diagonal part of matrix */ 1701 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1702 PetscCall(VecDuplicate(l->lvec, &lmask)); 1703 PetscCall(VecGetArray(xmask, &bb)); 1704 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1705 PetscCall(VecRestoreArray(xmask, &bb)); 1706 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1707 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1708 PetscCall(VecDestroy(&xmask)); 1709 if (x) { 1710 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1712 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1713 PetscCall(VecGetArray(b, &bb)); 1714 } 1715 PetscCall(VecGetArray(lmask, &mask)); 1716 /* remove zeroed rows of off-diagonal matrix */ 1717 for (i = 0; i < len; ++i) { 1718 row = lrows[i]; 1719 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1720 aa = baij->a + baij->i[row / bs] * bs2 + (row % bs); 1721 for (k = 0; k < count; ++k) { 1722 aa[0] = 0.0; 1723 aa += bs; 1724 } 1725 } 1726 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1727 for (i = 0; i < l->B->rmap->N; ++i) { 1728 row = i / bs; 1729 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1730 for (k = 0; k < bs; ++k) { 1731 col = bs * baij->j[j] + k; 1732 if (PetscAbsScalar(mask[col])) { 1733 aa = baij->a + j * bs2 + (i % bs) + bs * k; 1734 if (x) bb[i] -= aa[0] * xx[col]; 1735 aa[0] = 0.0; 1736 } 1737 } 1738 } 1739 } 1740 if (x) { 1741 PetscCall(VecRestoreArray(b, &bb)); 1742 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1743 } 1744 PetscCall(VecRestoreArray(lmask, &mask)); 1745 PetscCall(VecDestroy(&lmask)); 1746 PetscCall(PetscFree(lrows)); 1747 1748 /* only change matrix nonzero state if pattern was allowed to be changed */ 1749 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1750 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1751 PetscCallMPI(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1752 } 1753 PetscFunctionReturn(PETSC_SUCCESS); 1754 } 1755 1756 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1757 { 1758 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1759 1760 PetscFunctionBegin; 1761 PetscCall(MatSetUnfactored(a->A)); 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1766 1767 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1768 { 1769 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1770 Mat a, b, c, d; 1771 PetscBool flg; 1772 1773 PetscFunctionBegin; 1774 a = matA->A; 1775 b = matA->B; 1776 c = matB->A; 1777 d = matB->B; 1778 1779 PetscCall(MatEqual(a, c, &flg)); 1780 if (flg) PetscCall(MatEqual(b, d, &flg)); 1781 PetscCallMPI(MPIU_Allreduce(&flg, flag, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1782 PetscFunctionReturn(PETSC_SUCCESS); 1783 } 1784 1785 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1786 { 1787 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1788 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1789 1790 PetscFunctionBegin; 1791 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1792 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1793 PetscCall(MatCopy_Basic(A, B, str)); 1794 } else { 1795 PetscCall(MatCopy(a->A, b->A, str)); 1796 PetscCall(MatCopy(a->B, b->B, str)); 1797 } 1798 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1799 PetscFunctionReturn(PETSC_SUCCESS); 1800 } 1801 1802 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1803 { 1804 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1805 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1806 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1807 1808 PetscFunctionBegin; 1809 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1814 { 1815 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1816 PetscBLASInt bnz, one = 1; 1817 Mat_SeqBAIJ *x, *y; 1818 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1819 1820 PetscFunctionBegin; 1821 if (str == SAME_NONZERO_PATTERN) { 1822 PetscScalar alpha = a; 1823 x = (Mat_SeqBAIJ *)xx->A->data; 1824 y = (Mat_SeqBAIJ *)yy->A->data; 1825 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1826 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1827 x = (Mat_SeqBAIJ *)xx->B->data; 1828 y = (Mat_SeqBAIJ *)yy->B->data; 1829 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1830 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1831 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1832 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1833 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1834 } else { 1835 Mat B; 1836 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1837 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1838 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1839 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1840 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1841 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1842 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1843 PetscCall(MatSetType(B, MATMPIBAIJ)); 1844 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1845 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1846 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1847 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1848 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1849 PetscCall(MatHeaderMerge(Y, &B)); 1850 PetscCall(PetscFree(nnz_d)); 1851 PetscCall(PetscFree(nnz_o)); 1852 } 1853 PetscFunctionReturn(PETSC_SUCCESS); 1854 } 1855 1856 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1857 { 1858 PetscFunctionBegin; 1859 if (PetscDefined(USE_COMPLEX)) { 1860 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1861 1862 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1863 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1864 } 1865 PetscFunctionReturn(PETSC_SUCCESS); 1866 } 1867 1868 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1869 { 1870 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1871 1872 PetscFunctionBegin; 1873 PetscCall(MatRealPart(a->A)); 1874 PetscCall(MatRealPart(a->B)); 1875 PetscFunctionReturn(PETSC_SUCCESS); 1876 } 1877 1878 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1879 { 1880 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1881 1882 PetscFunctionBegin; 1883 PetscCall(MatImaginaryPart(a->A)); 1884 PetscCall(MatImaginaryPart(a->B)); 1885 PetscFunctionReturn(PETSC_SUCCESS); 1886 } 1887 1888 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1889 { 1890 IS iscol_local; 1891 PetscInt csize; 1892 1893 PetscFunctionBegin; 1894 PetscCall(ISGetLocalSize(iscol, &csize)); 1895 if (call == MAT_REUSE_MATRIX) { 1896 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1897 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1898 } else { 1899 PetscCall(ISAllGather(iscol, &iscol_local)); 1900 } 1901 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1902 if (call == MAT_INITIAL_MATRIX) { 1903 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1904 PetscCall(ISDestroy(&iscol_local)); 1905 } 1906 PetscFunctionReturn(PETSC_SUCCESS); 1907 } 1908 1909 /* 1910 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1911 in local and then by concatenating the local matrices the end result. 1912 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1913 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1914 */ 1915 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1916 { 1917 PetscMPIInt rank, size; 1918 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1919 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1920 Mat M, Mreuse; 1921 MatScalar *vwork, *aa; 1922 MPI_Comm comm; 1923 IS isrow_new, iscol_new; 1924 Mat_SeqBAIJ *aij; 1925 1926 PetscFunctionBegin; 1927 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1928 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1929 PetscCallMPI(MPI_Comm_size(comm, &size)); 1930 /* The compression and expansion should be avoided. Doesn't point 1931 out errors, might change the indices, hence buggey */ 1932 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1933 if (isrow == iscol) { 1934 iscol_new = isrow_new; 1935 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1936 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1937 1938 if (call == MAT_REUSE_MATRIX) { 1939 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1940 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1941 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1942 } else { 1943 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1944 } 1945 PetscCall(ISDestroy(&isrow_new)); 1946 PetscCall(ISDestroy(&iscol_new)); 1947 /* 1948 m - number of local rows 1949 n - number of columns (same on all processors) 1950 rstart - first row in new global matrix generated 1951 */ 1952 PetscCall(MatGetBlockSize(mat, &bs)); 1953 PetscCall(MatGetSize(Mreuse, &m, &n)); 1954 m = m / bs; 1955 n = n / bs; 1956 1957 if (call == MAT_INITIAL_MATRIX) { 1958 aij = (Mat_SeqBAIJ *)Mreuse->data; 1959 ii = aij->i; 1960 jj = aij->j; 1961 1962 /* 1963 Determine the number of non-zeros in the diagonal and off-diagonal 1964 portions of the matrix in order to do correct preallocation 1965 */ 1966 1967 /* first get start and end of "diagonal" columns */ 1968 if (csize == PETSC_DECIDE) { 1969 PetscCall(ISGetSize(isrow, &mglobal)); 1970 if (mglobal == n * bs) { /* square matrix */ 1971 nlocal = m; 1972 } else { 1973 nlocal = n / size + ((n % size) > rank); 1974 } 1975 } else { 1976 nlocal = csize / bs; 1977 } 1978 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1979 rstart = rend - nlocal; 1980 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1981 1982 /* next, compute all the lengths */ 1983 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1984 for (i = 0; i < m; i++) { 1985 jend = ii[i + 1] - ii[i]; 1986 olen = 0; 1987 dlen = 0; 1988 for (j = 0; j < jend; j++) { 1989 if (*jj < rstart || *jj >= rend) olen++; 1990 else dlen++; 1991 jj++; 1992 } 1993 olens[i] = olen; 1994 dlens[i] = dlen; 1995 } 1996 PetscCall(MatCreate(comm, &M)); 1997 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1998 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1999 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2000 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2001 PetscCall(PetscFree2(dlens, olens)); 2002 } else { 2003 PetscInt ml, nl; 2004 2005 M = *newmat; 2006 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2007 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2008 PetscCall(MatZeroEntries(M)); 2009 /* 2010 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2011 rather than the slower MatSetValues(). 2012 */ 2013 M->was_assembled = PETSC_TRUE; 2014 M->assembled = PETSC_FALSE; 2015 } 2016 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2017 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2018 aij = (Mat_SeqBAIJ *)Mreuse->data; 2019 ii = aij->i; 2020 jj = aij->j; 2021 aa = aij->a; 2022 for (i = 0; i < m; i++) { 2023 row = rstart / bs + i; 2024 nz = ii[i + 1] - ii[i]; 2025 cwork = jj; 2026 jj = PetscSafePointerPlusOffset(jj, nz); 2027 vwork = aa; 2028 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2029 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2030 } 2031 2032 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2033 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2034 *newmat = M; 2035 2036 /* save submatrix used in processor for next request */ 2037 if (call == MAT_INITIAL_MATRIX) { 2038 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2039 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2040 } 2041 PetscFunctionReturn(PETSC_SUCCESS); 2042 } 2043 2044 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2045 { 2046 MPI_Comm comm, pcomm; 2047 PetscInt clocal_size, nrows; 2048 const PetscInt *rows; 2049 PetscMPIInt size; 2050 IS crowp, lcolp; 2051 2052 PetscFunctionBegin; 2053 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2054 /* make a collective version of 'rowp' */ 2055 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2056 if (pcomm == comm) { 2057 crowp = rowp; 2058 } else { 2059 PetscCall(ISGetSize(rowp, &nrows)); 2060 PetscCall(ISGetIndices(rowp, &rows)); 2061 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2062 PetscCall(ISRestoreIndices(rowp, &rows)); 2063 } 2064 PetscCall(ISSetPermutation(crowp)); 2065 /* make a local version of 'colp' */ 2066 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2067 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2068 if (size == 1) { 2069 lcolp = colp; 2070 } else { 2071 PetscCall(ISAllGather(colp, &lcolp)); 2072 } 2073 PetscCall(ISSetPermutation(lcolp)); 2074 /* now we just get the submatrix */ 2075 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2076 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2077 /* clean up */ 2078 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2079 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2080 PetscFunctionReturn(PETSC_SUCCESS); 2081 } 2082 2083 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2084 { 2085 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2086 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2087 2088 PetscFunctionBegin; 2089 if (nghosts) *nghosts = B->nbs; 2090 if (ghosts) *ghosts = baij->garray; 2091 PetscFunctionReturn(PETSC_SUCCESS); 2092 } 2093 2094 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2095 { 2096 Mat B; 2097 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2098 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2099 Mat_SeqAIJ *b; 2100 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2101 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2102 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2103 2104 PetscFunctionBegin; 2105 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2106 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2107 2108 /* Tell every processor the number of nonzeros per row */ 2109 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2110 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2111 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2112 displs = recvcounts + size; 2113 for (i = 0; i < size; i++) { 2114 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2115 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2116 } 2117 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2118 /* Create the sequential matrix of the same type as the local block diagonal */ 2119 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2120 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2121 PetscCall(MatSetType(B, MATSEQAIJ)); 2122 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2123 b = (Mat_SeqAIJ *)B->data; 2124 2125 /* Copy my part of matrix column indices over */ 2126 sendcount = ad->nz + bd->nz; 2127 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2128 a_jsendbuf = ad->j; 2129 b_jsendbuf = bd->j; 2130 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2131 cnt = 0; 2132 for (i = 0; i < n; i++) { 2133 /* put in lower diagonal portion */ 2134 m = bd->i[i + 1] - bd->i[i]; 2135 while (m > 0) { 2136 /* is it above diagonal (in bd (compressed) numbering) */ 2137 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2138 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2139 m--; 2140 } 2141 2142 /* put in diagonal portion */ 2143 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2144 2145 /* put in upper diagonal portion */ 2146 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2147 } 2148 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2149 2150 /* Gather all column indices to all processors */ 2151 for (i = 0; i < size; i++) { 2152 recvcounts[i] = 0; 2153 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2154 } 2155 displs[0] = 0; 2156 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2157 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2158 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2159 /* set the b->ilen (length of each row) values */ 2160 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2161 /* set the b->i indices */ 2162 b->i[0] = 0; 2163 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2164 PetscCall(PetscFree(lens)); 2165 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2166 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2167 PetscCall(PetscFree(recvcounts)); 2168 2169 PetscCall(MatPropagateSymmetryOptions(A, B)); 2170 *newmat = B; 2171 PetscFunctionReturn(PETSC_SUCCESS); 2172 } 2173 2174 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2175 { 2176 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2177 Vec bb1 = NULL; 2178 2179 PetscFunctionBegin; 2180 if (flag == SOR_APPLY_UPPER) { 2181 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2186 2187 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2188 if (flag & SOR_ZERO_INITIAL_GUESS) { 2189 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2190 its--; 2191 } 2192 2193 while (its--) { 2194 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2195 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2196 2197 /* update rhs: bb1 = bb - B*x */ 2198 PetscCall(VecScale(mat->lvec, -1.0)); 2199 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2200 2201 /* local sweep */ 2202 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2203 } 2204 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2205 if (flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2207 its--; 2208 } 2209 while (its--) { 2210 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2211 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2212 2213 /* update rhs: bb1 = bb - B*x */ 2214 PetscCall(VecScale(mat->lvec, -1.0)); 2215 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2216 2217 /* local sweep */ 2218 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2219 } 2220 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2221 if (flag & SOR_ZERO_INITIAL_GUESS) { 2222 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2223 its--; 2224 } 2225 while (its--) { 2226 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2227 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2228 2229 /* update rhs: bb1 = bb - B*x */ 2230 PetscCall(VecScale(mat->lvec, -1.0)); 2231 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2232 2233 /* local sweep */ 2234 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2235 } 2236 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2237 2238 PetscCall(VecDestroy(&bb1)); 2239 PetscFunctionReturn(PETSC_SUCCESS); 2240 } 2241 2242 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2243 { 2244 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2245 PetscInt m, N, i, *garray = aij->garray; 2246 PetscInt ib, jb, bs = A->rmap->bs; 2247 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2248 MatScalar *a_val = a_aij->a; 2249 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2250 MatScalar *b_val = b_aij->a; 2251 PetscReal *work; 2252 2253 PetscFunctionBegin; 2254 PetscCall(MatGetSize(A, &m, &N)); 2255 PetscCall(PetscCalloc1(N, &work)); 2256 if (type == NORM_2) { 2257 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2258 for (jb = 0; jb < bs; jb++) { 2259 for (ib = 0; ib < bs; ib++) { 2260 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2261 a_val++; 2262 } 2263 } 2264 } 2265 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2269 b_val++; 2270 } 2271 } 2272 } 2273 } else if (type == NORM_1) { 2274 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2278 a_val++; 2279 } 2280 } 2281 } 2282 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2286 b_val++; 2287 } 2288 } 2289 } 2290 } else if (type == NORM_INFINITY) { 2291 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2295 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2296 a_val++; 2297 } 2298 } 2299 } 2300 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2301 for (jb = 0; jb < bs; jb++) { 2302 for (ib = 0; ib < bs; ib++) { 2303 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2304 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2305 b_val++; 2306 } 2307 } 2308 } 2309 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2310 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2311 for (jb = 0; jb < bs; jb++) { 2312 for (ib = 0; ib < bs; ib++) { 2313 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2314 a_val++; 2315 } 2316 } 2317 } 2318 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2322 b_val++; 2323 } 2324 } 2325 } 2326 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2327 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2331 a_val++; 2332 } 2333 } 2334 } 2335 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2339 b_val++; 2340 } 2341 } 2342 } 2343 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2344 if (type == NORM_INFINITY) { 2345 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2346 } else { 2347 PetscCallMPI(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2348 } 2349 PetscCall(PetscFree(work)); 2350 if (type == NORM_2) { 2351 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2352 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2353 for (i = 0; i < N; i++) reductions[i] /= m; 2354 } 2355 PetscFunctionReturn(PETSC_SUCCESS); 2356 } 2357 2358 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2359 { 2360 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2361 2362 PetscFunctionBegin; 2363 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2364 A->factorerrortype = a->A->factorerrortype; 2365 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2366 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2367 PetscFunctionReturn(PETSC_SUCCESS); 2368 } 2369 2370 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2371 { 2372 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2373 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2374 2375 PetscFunctionBegin; 2376 if (!Y->preallocated) { 2377 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2378 } else if (!aij->nz) { 2379 PetscInt nonew = aij->nonew; 2380 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2381 aij->nonew = nonew; 2382 } 2383 PetscCall(MatShift_Basic(Y, a)); 2384 PetscFunctionReturn(PETSC_SUCCESS); 2385 } 2386 2387 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2388 { 2389 PetscFunctionBegin; 2390 *a = ((Mat_MPIBAIJ *)A->data)->A; 2391 PetscFunctionReturn(PETSC_SUCCESS); 2392 } 2393 2394 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2395 { 2396 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2397 2398 PetscFunctionBegin; 2399 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2400 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2401 PetscFunctionReturn(PETSC_SUCCESS); 2402 } 2403 2404 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2405 MatGetRow_MPIBAIJ, 2406 MatRestoreRow_MPIBAIJ, 2407 MatMult_MPIBAIJ, 2408 /* 4*/ MatMultAdd_MPIBAIJ, 2409 MatMultTranspose_MPIBAIJ, 2410 MatMultTransposeAdd_MPIBAIJ, 2411 NULL, 2412 NULL, 2413 NULL, 2414 /*10*/ NULL, 2415 NULL, 2416 NULL, 2417 MatSOR_MPIBAIJ, 2418 MatTranspose_MPIBAIJ, 2419 /*15*/ MatGetInfo_MPIBAIJ, 2420 MatEqual_MPIBAIJ, 2421 MatGetDiagonal_MPIBAIJ, 2422 MatDiagonalScale_MPIBAIJ, 2423 MatNorm_MPIBAIJ, 2424 /*20*/ MatAssemblyBegin_MPIBAIJ, 2425 MatAssemblyEnd_MPIBAIJ, 2426 MatSetOption_MPIBAIJ, 2427 MatZeroEntries_MPIBAIJ, 2428 /*24*/ MatZeroRows_MPIBAIJ, 2429 NULL, 2430 NULL, 2431 NULL, 2432 NULL, 2433 /*29*/ MatSetUp_MPI_Hash, 2434 NULL, 2435 NULL, 2436 MatGetDiagonalBlock_MPIBAIJ, 2437 NULL, 2438 /*34*/ MatDuplicate_MPIBAIJ, 2439 NULL, 2440 NULL, 2441 NULL, 2442 NULL, 2443 /*39*/ MatAXPY_MPIBAIJ, 2444 MatCreateSubMatrices_MPIBAIJ, 2445 MatIncreaseOverlap_MPIBAIJ, 2446 MatGetValues_MPIBAIJ, 2447 MatCopy_MPIBAIJ, 2448 /*44*/ NULL, 2449 MatScale_MPIBAIJ, 2450 MatShift_MPIBAIJ, 2451 NULL, 2452 MatZeroRowsColumns_MPIBAIJ, 2453 /*49*/ NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 NULL, 2458 /*54*/ MatFDColoringCreate_MPIXAIJ, 2459 NULL, 2460 MatSetUnfactored_MPIBAIJ, 2461 MatPermute_MPIBAIJ, 2462 MatSetValuesBlocked_MPIBAIJ, 2463 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2464 MatDestroy_MPIBAIJ, 2465 MatView_MPIBAIJ, 2466 NULL, 2467 NULL, 2468 /*64*/ NULL, 2469 NULL, 2470 NULL, 2471 NULL, 2472 MatGetRowMaxAbs_MPIBAIJ, 2473 /*69*/ NULL, 2474 NULL, 2475 NULL, 2476 MatFDColoringApply_BAIJ, 2477 NULL, 2478 /*74*/ NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 MatLoad_MPIBAIJ, 2483 /*79*/ NULL, 2484 NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 /*84*/ NULL, 2489 NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 /*89*/ NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 MatConjugate_MPIBAIJ, 2498 /*94*/ NULL, 2499 NULL, 2500 MatRealPart_MPIBAIJ, 2501 MatImaginaryPart_MPIBAIJ, 2502 NULL, 2503 /*99*/ NULL, 2504 NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 /*104*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2509 NULL, 2510 MatGetGhosts_MPIBAIJ, 2511 NULL, 2512 NULL, 2513 /*109*/ NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 MatGetMultiProcBlock_MPIBAIJ, 2518 /*114*/ NULL, 2519 MatGetColumnReductions_MPIBAIJ, 2520 MatInvertBlockDiagonal_MPIBAIJ, 2521 NULL, 2522 NULL, 2523 /*119*/ NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 /*124*/ NULL, 2529 NULL, 2530 MatSetBlockSizes_Default, 2531 NULL, 2532 MatFDColoringSetUp_MPIXAIJ, 2533 /*129*/ NULL, 2534 MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 NULL, 2538 /*134*/ NULL, 2539 NULL, 2540 MatEliminateZeros_MPIBAIJ, 2541 MatGetRowSumAbs_MPIBAIJ, 2542 NULL, 2543 /*139*/ NULL, 2544 NULL, 2545 MatCopyHashToXAIJ_MPI_Hash, 2546 NULL, 2547 NULL}; 2548 2549 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2550 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2551 2552 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2553 { 2554 PetscInt m, rstart, cstart, cend; 2555 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2556 const PetscInt *JJ = NULL; 2557 PetscScalar *values = NULL; 2558 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2559 PetscBool nooffprocentries; 2560 2561 PetscFunctionBegin; 2562 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2563 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2564 PetscCall(PetscLayoutSetUp(B->rmap)); 2565 PetscCall(PetscLayoutSetUp(B->cmap)); 2566 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2567 m = B->rmap->n / bs; 2568 rstart = B->rmap->rstart / bs; 2569 cstart = B->cmap->rstart / bs; 2570 cend = B->cmap->rend / bs; 2571 2572 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2573 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2574 for (i = 0; i < m; i++) { 2575 nz = ii[i + 1] - ii[i]; 2576 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2577 nz_max = PetscMax(nz_max, nz); 2578 dlen = 0; 2579 olen = 0; 2580 JJ = jj + ii[i]; 2581 for (j = 0; j < nz; j++) { 2582 if (*JJ < cstart || *JJ >= cend) olen++; 2583 else dlen++; 2584 JJ++; 2585 } 2586 d_nnz[i] = dlen; 2587 o_nnz[i] = olen; 2588 } 2589 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2590 PetscCall(PetscFree2(d_nnz, o_nnz)); 2591 2592 values = (PetscScalar *)V; 2593 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2594 for (i = 0; i < m; i++) { 2595 PetscInt row = i + rstart; 2596 PetscInt ncols = ii[i + 1] - ii[i]; 2597 const PetscInt *icols = jj + ii[i]; 2598 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2599 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2600 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2601 } else { /* block ordering does not match so we can only insert one block at a time. */ 2602 PetscInt j; 2603 for (j = 0; j < ncols; j++) { 2604 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2605 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2606 } 2607 } 2608 } 2609 2610 if (!V) PetscCall(PetscFree(values)); 2611 nooffprocentries = B->nooffprocentries; 2612 B->nooffprocentries = PETSC_TRUE; 2613 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2614 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2615 B->nooffprocentries = nooffprocentries; 2616 2617 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2618 PetscFunctionReturn(PETSC_SUCCESS); 2619 } 2620 2621 /*@C 2622 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2623 2624 Collective 2625 2626 Input Parameters: 2627 + B - the matrix 2628 . bs - the block size 2629 . i - the indices into `j` for the start of each local row (starts with zero) 2630 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2631 - v - optional values in the matrix, use `NULL` if not provided 2632 2633 Level: advanced 2634 2635 Notes: 2636 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2637 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2638 called this routine. 2639 2640 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2641 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2642 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2643 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2644 block column and the second index is over columns within a block. 2645 2646 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2647 2648 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2649 @*/ 2650 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2651 { 2652 PetscFunctionBegin; 2653 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2654 PetscValidType(B, 1); 2655 PetscValidLogicalCollectiveInt(B, bs, 2); 2656 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2657 PetscFunctionReturn(PETSC_SUCCESS); 2658 } 2659 2660 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2661 { 2662 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2663 PetscInt i; 2664 PetscMPIInt size; 2665 2666 PetscFunctionBegin; 2667 if (B->hash_active) { 2668 B->ops[0] = b->cops; 2669 B->hash_active = PETSC_FALSE; 2670 } 2671 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2672 PetscCall(MatSetBlockSize(B, bs)); 2673 PetscCall(PetscLayoutSetUp(B->rmap)); 2674 PetscCall(PetscLayoutSetUp(B->cmap)); 2675 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2676 2677 if (d_nnz) { 2678 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2679 } 2680 if (o_nnz) { 2681 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2682 } 2683 2684 b->bs2 = bs * bs; 2685 b->mbs = B->rmap->n / bs; 2686 b->nbs = B->cmap->n / bs; 2687 b->Mbs = B->rmap->N / bs; 2688 b->Nbs = B->cmap->N / bs; 2689 2690 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2691 b->rstartbs = B->rmap->rstart / bs; 2692 b->rendbs = B->rmap->rend / bs; 2693 b->cstartbs = B->cmap->rstart / bs; 2694 b->cendbs = B->cmap->rend / bs; 2695 2696 #if defined(PETSC_USE_CTABLE) 2697 PetscCall(PetscHMapIDestroy(&b->colmap)); 2698 #else 2699 PetscCall(PetscFree(b->colmap)); 2700 #endif 2701 PetscCall(PetscFree(b->garray)); 2702 PetscCall(VecDestroy(&b->lvec)); 2703 PetscCall(VecScatterDestroy(&b->Mvctx)); 2704 2705 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2706 2707 MatSeqXAIJGetOptions_Private(b->B); 2708 PetscCall(MatDestroy(&b->B)); 2709 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2710 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2711 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2712 MatSeqXAIJRestoreOptions_Private(b->B); 2713 2714 MatSeqXAIJGetOptions_Private(b->A); 2715 PetscCall(MatDestroy(&b->A)); 2716 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2717 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2718 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2719 MatSeqXAIJRestoreOptions_Private(b->A); 2720 2721 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2722 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2723 B->preallocated = PETSC_TRUE; 2724 B->was_assembled = PETSC_FALSE; 2725 B->assembled = PETSC_FALSE; 2726 PetscFunctionReturn(PETSC_SUCCESS); 2727 } 2728 2729 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2730 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2731 2732 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2733 { 2734 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2735 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2736 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2737 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2738 2739 PetscFunctionBegin; 2740 PetscCall(PetscMalloc1(M + 1, &ii)); 2741 ii[0] = 0; 2742 for (i = 0; i < M; i++) { 2743 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2744 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2745 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2746 /* remove one from count of matrix has diagonal */ 2747 for (j = id[i]; j < id[i + 1]; j++) { 2748 if (jd[j] == i) { 2749 ii[i + 1]--; 2750 break; 2751 } 2752 } 2753 } 2754 PetscCall(PetscMalloc1(ii[M], &jj)); 2755 cnt = 0; 2756 for (i = 0; i < M; i++) { 2757 for (j = io[i]; j < io[i + 1]; j++) { 2758 if (garray[jo[j]] > rstart) break; 2759 jj[cnt++] = garray[jo[j]]; 2760 } 2761 for (k = id[i]; k < id[i + 1]; k++) { 2762 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2763 } 2764 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2765 } 2766 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2767 PetscFunctionReturn(PETSC_SUCCESS); 2768 } 2769 2770 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2771 2772 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2773 2774 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2775 { 2776 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2777 Mat_MPIAIJ *b; 2778 Mat B; 2779 2780 PetscFunctionBegin; 2781 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2782 2783 if (reuse == MAT_REUSE_MATRIX) { 2784 B = *newmat; 2785 } else { 2786 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2787 PetscCall(MatSetType(B, MATMPIAIJ)); 2788 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2789 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2790 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2791 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2792 } 2793 b = (Mat_MPIAIJ *)B->data; 2794 2795 if (reuse == MAT_REUSE_MATRIX) { 2796 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2797 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2798 } else { 2799 PetscInt *garray = a->garray; 2800 Mat_SeqAIJ *bB; 2801 PetscInt bs, nnz; 2802 PetscCall(MatDestroy(&b->A)); 2803 PetscCall(MatDestroy(&b->B)); 2804 /* just clear out the data structure */ 2805 PetscCall(MatDisAssemble_MPIAIJ(B, PETSC_FALSE)); 2806 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2807 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2808 2809 /* Global numbering for b->B columns */ 2810 bB = (Mat_SeqAIJ *)b->B->data; 2811 bs = A->rmap->bs; 2812 nnz = bB->i[A->rmap->n]; 2813 for (PetscInt k = 0; k < nnz; k++) { 2814 PetscInt bj = bB->j[k] / bs; 2815 PetscInt br = bB->j[k] % bs; 2816 bB->j[k] = garray[bj] * bs + br; 2817 } 2818 } 2819 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2820 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2821 2822 if (reuse == MAT_INPLACE_MATRIX) { 2823 PetscCall(MatHeaderReplace(A, &B)); 2824 } else { 2825 *newmat = B; 2826 } 2827 PetscFunctionReturn(PETSC_SUCCESS); 2828 } 2829 2830 /*MC 2831 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2832 2833 Options Database Keys: 2834 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2835 . -mat_block_size <bs> - set the blocksize used to store the matrix 2836 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2837 - -mat_use_hash_table <fact> - set hash table factor 2838 2839 Level: beginner 2840 2841 Note: 2842 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2843 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2844 2845 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2846 M*/ 2847 2848 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2849 2850 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2851 { 2852 Mat_MPIBAIJ *b; 2853 PetscBool flg = PETSC_FALSE; 2854 2855 PetscFunctionBegin; 2856 PetscCall(PetscNew(&b)); 2857 B->data = (void *)b; 2858 B->ops[0] = MatOps_Values; 2859 B->assembled = PETSC_FALSE; 2860 2861 B->insertmode = NOT_SET_VALUES; 2862 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2863 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2864 2865 /* build local table of row and column ownerships */ 2866 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2867 2868 /* build cache for off array entries formed */ 2869 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2870 2871 b->donotstash = PETSC_FALSE; 2872 b->colmap = NULL; 2873 b->garray = NULL; 2874 b->roworiented = PETSC_TRUE; 2875 2876 /* stuff used in block assembly */ 2877 b->barray = NULL; 2878 2879 /* stuff used for matrix vector multiply */ 2880 b->lvec = NULL; 2881 b->Mvctx = NULL; 2882 2883 /* stuff for MatGetRow() */ 2884 b->rowindices = NULL; 2885 b->rowvalues = NULL; 2886 b->getrowactive = PETSC_FALSE; 2887 2888 /* hash table stuff */ 2889 b->ht = NULL; 2890 b->hd = NULL; 2891 b->ht_size = 0; 2892 b->ht_flag = PETSC_FALSE; 2893 b->ht_fact = 0; 2894 b->ht_total_ct = 0; 2895 b->ht_insert_ct = 0; 2896 2897 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2898 b->ijonly = PETSC_FALSE; 2899 2900 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2901 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2902 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2903 #if defined(PETSC_HAVE_HYPRE) 2904 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2905 #endif 2906 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2907 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2908 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2909 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2913 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2914 2915 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2916 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2917 if (flg) { 2918 PetscReal fact = 1.39; 2919 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2920 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2921 if (fact <= 1.0) fact = 1.39; 2922 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2923 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2924 } 2925 PetscOptionsEnd(); 2926 PetscFunctionReturn(PETSC_SUCCESS); 2927 } 2928 2929 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2930 /*MC 2931 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2932 2933 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2934 and `MATMPIBAIJ` otherwise. 2935 2936 Options Database Keys: 2937 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2938 2939 Level: beginner 2940 2941 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2942 M*/ 2943 2944 /*@ 2945 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2946 (block compressed row). 2947 2948 Collective 2949 2950 Input Parameters: 2951 + B - the matrix 2952 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2953 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2954 . d_nz - number of block nonzeros per block row in diagonal portion of local 2955 submatrix (same for all local rows) 2956 . d_nnz - array containing the number of block nonzeros in the various block rows 2957 of the in diagonal portion of the local (possibly different for each block 2958 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2959 set it even if it is zero. 2960 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2961 submatrix (same for all local rows). 2962 - o_nnz - array containing the number of nonzeros in the various block rows of the 2963 off-diagonal portion of the local submatrix (possibly different for 2964 each block row) or `NULL`. 2965 2966 If the *_nnz parameter is given then the *_nz parameter is ignored 2967 2968 Options Database Keys: 2969 + -mat_block_size - size of the blocks to use 2970 - -mat_use_hash_table <fact> - set hash table factor 2971 2972 Level: intermediate 2973 2974 Notes: 2975 For good matrix assembly performance 2976 the user should preallocate the matrix storage by setting the parameters 2977 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2978 performance can be increased by more than a factor of 50. 2979 2980 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2981 than it must be used on all processors that share the object for that argument. 2982 2983 Storage Information: 2984 For a square global matrix we define each processor's diagonal portion 2985 to be its local rows and the corresponding columns (a square submatrix); 2986 each processor's off-diagonal portion encompasses the remainder of the 2987 local matrix (a rectangular submatrix). 2988 2989 The user can specify preallocated storage for the diagonal part of 2990 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 2991 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 2992 memory allocation. Likewise, specify preallocated storage for the 2993 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 2994 2995 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2996 the figure below we depict these three local rows and all columns (0-11). 2997 2998 .vb 2999 0 1 2 3 4 5 6 7 8 9 10 11 3000 -------------------------- 3001 row 3 |o o o d d d o o o o o o 3002 row 4 |o o o d d d o o o o o o 3003 row 5 |o o o d d d o o o o o o 3004 -------------------------- 3005 .ve 3006 3007 Thus, any entries in the d locations are stored in the d (diagonal) 3008 submatrix, and any entries in the o locations are stored in the 3009 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3010 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3011 3012 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3013 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3014 In general, for PDE problems in which most nonzeros are near the diagonal, 3015 one expects `d_nz` >> `o_nz`. 3016 3017 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3018 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3019 You can also run with the option `-info` and look for messages with the string 3020 malloc in them to see if additional memory allocation was needed. 3021 3022 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3023 @*/ 3024 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3025 { 3026 PetscFunctionBegin; 3027 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3028 PetscValidType(B, 1); 3029 PetscValidLogicalCollectiveInt(B, bs, 2); 3030 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3031 PetscFunctionReturn(PETSC_SUCCESS); 3032 } 3033 3034 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3035 /*@ 3036 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3037 (block compressed row). 3038 3039 Collective 3040 3041 Input Parameters: 3042 + comm - MPI communicator 3043 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3044 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3045 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3046 This value should be the same as the local size used in creating the 3047 y vector for the matrix-vector product y = Ax. 3048 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3049 This value should be the same as the local size used in creating the 3050 x vector for the matrix-vector product y = Ax. 3051 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3052 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3053 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3054 submatrix (same for all local rows) 3055 . d_nnz - array containing the number of nonzero blocks in the various block rows 3056 of the in diagonal portion of the local (possibly different for each block 3057 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3058 and set it even if it is zero. 3059 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3060 submatrix (same for all local rows). 3061 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3062 off-diagonal portion of the local submatrix (possibly different for 3063 each block row) or NULL. 3064 3065 Output Parameter: 3066 . A - the matrix 3067 3068 Options Database Keys: 3069 + -mat_block_size - size of the blocks to use 3070 - -mat_use_hash_table <fact> - set hash table factor 3071 3072 Level: intermediate 3073 3074 Notes: 3075 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3076 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3077 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3078 3079 For good matrix assembly performance 3080 the user should preallocate the matrix storage by setting the parameters 3081 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3082 performance can be increased by more than a factor of 50. 3083 3084 If the *_nnz parameter is given then the *_nz parameter is ignored 3085 3086 A nonzero block is any block that as 1 or more nonzeros in it 3087 3088 The user MUST specify either the local or global matrix dimensions 3089 (possibly both). 3090 3091 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3092 than it must be used on all processors that share the object for that argument. 3093 3094 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3095 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3096 3097 Storage Information: 3098 For a square global matrix we define each processor's diagonal portion 3099 to be its local rows and the corresponding columns (a square submatrix); 3100 each processor's off-diagonal portion encompasses the remainder of the 3101 local matrix (a rectangular submatrix). 3102 3103 The user can specify preallocated storage for the diagonal part of 3104 the local submatrix with either d_nz or d_nnz (not both). Set 3105 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3106 memory allocation. Likewise, specify preallocated storage for the 3107 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3108 3109 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3110 the figure below we depict these three local rows and all columns (0-11). 3111 3112 .vb 3113 0 1 2 3 4 5 6 7 8 9 10 11 3114 -------------------------- 3115 row 3 |o o o d d d o o o o o o 3116 row 4 |o o o d d d o o o o o o 3117 row 5 |o o o d d d o o o o o o 3118 -------------------------- 3119 .ve 3120 3121 Thus, any entries in the d locations are stored in the d (diagonal) 3122 submatrix, and any entries in the o locations are stored in the 3123 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3124 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3125 3126 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3127 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3128 In general, for PDE problems in which most nonzeros are near the diagonal, 3129 one expects `d_nz` >> `o_nz`. 3130 3131 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3132 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3133 @*/ 3134 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3135 { 3136 PetscMPIInt size; 3137 3138 PetscFunctionBegin; 3139 PetscCall(MatCreate(comm, A)); 3140 PetscCall(MatSetSizes(*A, m, n, M, N)); 3141 PetscCallMPI(MPI_Comm_size(comm, &size)); 3142 if (size > 1) { 3143 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3144 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3145 } else { 3146 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3147 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3148 } 3149 PetscFunctionReturn(PETSC_SUCCESS); 3150 } 3151 3152 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3153 { 3154 Mat mat; 3155 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3156 PetscInt len = 0; 3157 3158 PetscFunctionBegin; 3159 *newmat = NULL; 3160 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3161 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3162 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3163 3164 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3165 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3166 if (matin->hash_active) { 3167 PetscCall(MatSetUp(mat)); 3168 } else { 3169 mat->factortype = matin->factortype; 3170 mat->preallocated = PETSC_TRUE; 3171 mat->assembled = PETSC_TRUE; 3172 mat->insertmode = NOT_SET_VALUES; 3173 3174 a = (Mat_MPIBAIJ *)mat->data; 3175 mat->rmap->bs = matin->rmap->bs; 3176 a->bs2 = oldmat->bs2; 3177 a->mbs = oldmat->mbs; 3178 a->nbs = oldmat->nbs; 3179 a->Mbs = oldmat->Mbs; 3180 a->Nbs = oldmat->Nbs; 3181 3182 a->size = oldmat->size; 3183 a->rank = oldmat->rank; 3184 a->donotstash = oldmat->donotstash; 3185 a->roworiented = oldmat->roworiented; 3186 a->rowindices = NULL; 3187 a->rowvalues = NULL; 3188 a->getrowactive = PETSC_FALSE; 3189 a->barray = NULL; 3190 a->rstartbs = oldmat->rstartbs; 3191 a->rendbs = oldmat->rendbs; 3192 a->cstartbs = oldmat->cstartbs; 3193 a->cendbs = oldmat->cendbs; 3194 3195 /* hash table stuff */ 3196 a->ht = NULL; 3197 a->hd = NULL; 3198 a->ht_size = 0; 3199 a->ht_flag = oldmat->ht_flag; 3200 a->ht_fact = oldmat->ht_fact; 3201 a->ht_total_ct = 0; 3202 a->ht_insert_ct = 0; 3203 3204 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3205 if (oldmat->colmap) { 3206 #if defined(PETSC_USE_CTABLE) 3207 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3208 #else 3209 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3210 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3211 #endif 3212 } else a->colmap = NULL; 3213 3214 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3215 PetscCall(PetscMalloc1(len, &a->garray)); 3216 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3217 } else a->garray = NULL; 3218 3219 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3220 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3221 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3222 3223 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3224 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3225 } 3226 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3227 *newmat = mat; 3228 PetscFunctionReturn(PETSC_SUCCESS); 3229 } 3230 3231 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3232 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3233 { 3234 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3235 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3236 PetscScalar *matvals; 3237 3238 PetscFunctionBegin; 3239 PetscCall(PetscViewerSetUp(viewer)); 3240 3241 /* read in matrix header */ 3242 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3243 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3244 M = header[1]; 3245 N = header[2]; 3246 nz = header[3]; 3247 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3248 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3249 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3250 3251 /* set block sizes from the viewer's .info file */ 3252 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3253 /* set local sizes if not set already */ 3254 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3255 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3256 /* set global sizes if not set already */ 3257 if (mat->rmap->N < 0) mat->rmap->N = M; 3258 if (mat->cmap->N < 0) mat->cmap->N = N; 3259 PetscCall(PetscLayoutSetUp(mat->rmap)); 3260 PetscCall(PetscLayoutSetUp(mat->cmap)); 3261 3262 /* check if the matrix sizes are correct */ 3263 PetscCall(MatGetSize(mat, &rows, &cols)); 3264 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3265 PetscCall(MatGetBlockSize(mat, &bs)); 3266 PetscCall(MatGetLocalSize(mat, &m, &n)); 3267 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3268 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3269 mbs = m / bs; 3270 nbs = n / bs; 3271 3272 /* read in row lengths and build row indices */ 3273 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3274 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3275 rowidxs[0] = 0; 3276 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3277 PetscCallMPI(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3278 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3279 3280 /* read in column indices and matrix values */ 3281 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3282 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3283 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3284 3285 { /* preallocate matrix storage */ 3286 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3287 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3288 PetscBool sbaij, done; 3289 PetscInt *d_nnz, *o_nnz; 3290 3291 PetscCall(PetscBTCreate(nbs, &bt)); 3292 PetscCall(PetscHSetICreate(&ht)); 3293 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3294 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3295 for (i = 0; i < mbs; i++) { 3296 PetscCall(PetscBTMemzero(nbs, bt)); 3297 PetscCall(PetscHSetIClear(ht)); 3298 for (k = 0; k < bs; k++) { 3299 PetscInt row = bs * i + k; 3300 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3301 PetscInt col = colidxs[j]; 3302 if (!sbaij || col >= row) { 3303 if (col >= cs && col < ce) { 3304 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3305 } else { 3306 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3307 if (done) o_nnz[i]++; 3308 } 3309 } 3310 } 3311 } 3312 } 3313 PetscCall(PetscBTDestroy(&bt)); 3314 PetscCall(PetscHSetIDestroy(&ht)); 3315 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3316 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3317 PetscCall(PetscFree2(d_nnz, o_nnz)); 3318 } 3319 3320 /* store matrix values */ 3321 for (i = 0; i < m; i++) { 3322 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3323 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3324 } 3325 3326 PetscCall(PetscFree(rowidxs)); 3327 PetscCall(PetscFree2(colidxs, matvals)); 3328 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3329 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3330 PetscFunctionReturn(PETSC_SUCCESS); 3331 } 3332 3333 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3334 { 3335 PetscBool isbinary; 3336 3337 PetscFunctionBegin; 3338 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3339 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3340 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3341 PetscFunctionReturn(PETSC_SUCCESS); 3342 } 3343 3344 /*@ 3345 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3346 3347 Input Parameters: 3348 + mat - the matrix 3349 - fact - factor 3350 3351 Options Database Key: 3352 . -mat_use_hash_table <fact> - provide the factor 3353 3354 Level: advanced 3355 3356 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3357 @*/ 3358 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3359 { 3360 PetscFunctionBegin; 3361 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3362 PetscFunctionReturn(PETSC_SUCCESS); 3363 } 3364 3365 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3366 { 3367 Mat_MPIBAIJ *baij; 3368 3369 PetscFunctionBegin; 3370 baij = (Mat_MPIBAIJ *)mat->data; 3371 baij->ht_fact = fact; 3372 PetscFunctionReturn(PETSC_SUCCESS); 3373 } 3374 3375 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3376 { 3377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3378 PetscBool flg; 3379 3380 PetscFunctionBegin; 3381 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3382 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3383 if (Ad) *Ad = a->A; 3384 if (Ao) *Ao = a->B; 3385 if (colmap) *colmap = a->garray; 3386 PetscFunctionReturn(PETSC_SUCCESS); 3387 } 3388 3389 /* 3390 Special version for direct calls from Fortran (to eliminate two function call overheads 3391 */ 3392 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3393 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3394 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3395 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3396 #endif 3397 3398 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3399 /*@C 3400 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3401 3402 Collective 3403 3404 Input Parameters: 3405 + matin - the matrix 3406 . min - number of input rows 3407 . im - input rows 3408 . nin - number of input columns 3409 . in - input columns 3410 . v - numerical values input 3411 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3412 3413 Level: advanced 3414 3415 Developer Notes: 3416 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3417 3418 .seealso: `Mat`, `MatSetValuesBlocked()` 3419 @*/ 3420 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3421 { 3422 /* convert input arguments to C version */ 3423 Mat mat = *matin; 3424 PetscInt m = *min, n = *nin; 3425 InsertMode addv = *addvin; 3426 3427 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3428 const MatScalar *value; 3429 MatScalar *barray = baij->barray; 3430 PetscBool roworiented = baij->roworiented; 3431 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3432 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3433 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3434 3435 PetscFunctionBegin; 3436 /* tasks normally handled by MatSetValuesBlocked() */ 3437 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3438 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3439 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3440 if (mat->assembled) { 3441 mat->was_assembled = PETSC_TRUE; 3442 mat->assembled = PETSC_FALSE; 3443 } 3444 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3445 3446 if (!barray) { 3447 PetscCall(PetscMalloc1(bs2, &barray)); 3448 baij->barray = barray; 3449 } 3450 3451 if (roworiented) stepval = (n - 1) * bs; 3452 else stepval = (m - 1) * bs; 3453 3454 for (i = 0; i < m; i++) { 3455 if (im[i] < 0) continue; 3456 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3457 if (im[i] >= rstart && im[i] < rend) { 3458 row = im[i] - rstart; 3459 for (j = 0; j < n; j++) { 3460 /* If NumCol = 1 then a copy is not required */ 3461 if ((roworiented) && (n == 1)) { 3462 barray = (MatScalar *)v + i * bs2; 3463 } else if ((!roworiented) && (m == 1)) { 3464 barray = (MatScalar *)v + j * bs2; 3465 } else { /* Here a copy is required */ 3466 if (roworiented) { 3467 value = v + i * (stepval + bs) * bs + j * bs; 3468 } else { 3469 value = v + j * (stepval + bs) * bs + i * bs; 3470 } 3471 for (ii = 0; ii < bs; ii++, value += stepval) { 3472 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3473 } 3474 barray -= bs2; 3475 } 3476 3477 if (in[j] >= cstart && in[j] < cend) { 3478 col = in[j] - cstart; 3479 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3480 } else if (in[j] < 0) { 3481 continue; 3482 } else { 3483 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3484 if (mat->was_assembled) { 3485 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3486 3487 #if defined(PETSC_USE_DEBUG) 3488 #if defined(PETSC_USE_CTABLE) 3489 { 3490 PetscInt data; 3491 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3492 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3493 } 3494 #else 3495 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3496 #endif 3497 #endif 3498 #if defined(PETSC_USE_CTABLE) 3499 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3500 col = (col - 1) / bs; 3501 #else 3502 col = (baij->colmap[in[j]] - 1) / bs; 3503 #endif 3504 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3505 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3506 col = in[j]; 3507 } 3508 } else col = in[j]; 3509 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3510 } 3511 } 3512 } else { 3513 if (!baij->donotstash) { 3514 if (roworiented) { 3515 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3516 } else { 3517 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3518 } 3519 } 3520 } 3521 } 3522 3523 /* task normally handled by MatSetValuesBlocked() */ 3524 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3525 PetscFunctionReturn(PETSC_SUCCESS); 3526 } 3527 3528 /*@ 3529 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3530 3531 Collective 3532 3533 Input Parameters: 3534 + comm - MPI communicator 3535 . bs - the block size, only a block size of 1 is supported 3536 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3537 . n - This value should be the same as the local size used in creating the 3538 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3539 calculated if `N` is given) For square matrices `n` is almost always `m`. 3540 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3541 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3542 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3543 . j - column indices 3544 - a - matrix values 3545 3546 Output Parameter: 3547 . mat - the matrix 3548 3549 Level: intermediate 3550 3551 Notes: 3552 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3553 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3554 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3555 3556 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3557 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3558 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3559 with column-major ordering within blocks. 3560 3561 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3562 3563 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3564 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3565 @*/ 3566 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3567 { 3568 PetscFunctionBegin; 3569 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3570 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3571 PetscCall(MatCreate(comm, mat)); 3572 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3573 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3574 PetscCall(MatSetBlockSize(*mat, bs)); 3575 PetscCall(MatSetUp(*mat)); 3576 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3577 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3578 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3579 PetscFunctionReturn(PETSC_SUCCESS); 3580 } 3581 3582 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3583 { 3584 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3585 PetscInt *indx; 3586 PetscScalar *values; 3587 3588 PetscFunctionBegin; 3589 PetscCall(MatGetSize(inmat, &m, &N)); 3590 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3591 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3592 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3593 PetscInt *bindx, rmax = a->rmax, j; 3594 PetscMPIInt rank, size; 3595 3596 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3597 mbs = m / bs; 3598 Nbs = N / cbs; 3599 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3600 nbs = n / cbs; 3601 3602 PetscCall(PetscMalloc1(rmax, &bindx)); 3603 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3604 3605 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3606 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3607 if (rank == size - 1) { 3608 /* Check sum(nbs) = Nbs */ 3609 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3610 } 3611 3612 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3613 for (i = 0; i < mbs; i++) { 3614 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3615 nnz = nnz / bs; 3616 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3617 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3618 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3619 } 3620 PetscCall(PetscFree(bindx)); 3621 3622 PetscCall(MatCreate(comm, outmat)); 3623 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3624 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3625 PetscCall(MatSetType(*outmat, MATBAIJ)); 3626 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3627 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3628 MatPreallocateEnd(dnz, onz); 3629 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3630 } 3631 3632 /* numeric phase */ 3633 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3634 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3635 3636 for (i = 0; i < m; i++) { 3637 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3638 Ii = i + rstart; 3639 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3640 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3641 } 3642 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3643 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3644 PetscFunctionReturn(PETSC_SUCCESS); 3645 } 3646