1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscMPIInt iN; 721 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(PetscMPIIntCast(mat->cmap->N, &iN)); 749 PetscCall(MPIU_Allreduce(tmp, tmp2, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 750 *nrm = 0.0; 751 for (j = 0; j < mat->cmap->N; j++) { 752 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 753 } 754 PetscCall(PetscFree(tmp)); 755 PetscCall(PetscFree(tmp2)); 756 } else if (type == NORM_INFINITY) { /* max row sum */ 757 PetscReal *sums; 758 PetscCall(PetscMalloc1(bs, &sums)); 759 sum = 0.0; 760 for (j = 0; j < amat->mbs; j++) { 761 for (row = 0; row < bs; row++) sums[row] = 0.0; 762 v = amat->a + bs2 * amat->i[j]; 763 nz = amat->i[j + 1] - amat->i[j]; 764 for (i = 0; i < nz; i++) { 765 for (col = 0; col < bs; col++) { 766 for (row = 0; row < bs; row++) { 767 sums[row] += PetscAbsScalar(*v); 768 v++; 769 } 770 } 771 } 772 v = bmat->a + bs2 * bmat->i[j]; 773 nz = bmat->i[j + 1] - bmat->i[j]; 774 for (i = 0; i < nz; i++) { 775 for (col = 0; col < bs; col++) { 776 for (row = 0; row < bs; row++) { 777 sums[row] += PetscAbsScalar(*v); 778 v++; 779 } 780 } 781 } 782 for (row = 0; row < bs; row++) { 783 if (sums[row] > sum) sum = sums[row]; 784 } 785 } 786 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 787 PetscCall(PetscFree(sums)); 788 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 789 } 790 PetscFunctionReturn(PETSC_SUCCESS); 791 } 792 793 /* 794 Creates the hash table, and sets the table 795 This table is created only once. 796 If new entried need to be added to the matrix 797 then the hash table has to be destroyed and 798 recreated. 799 */ 800 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 801 { 802 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 803 Mat A = baij->A, B = baij->B; 804 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 805 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 806 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 807 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 808 PetscInt *HT, key; 809 MatScalar **HD; 810 PetscReal tmp; 811 #if defined(PETSC_USE_INFO) 812 PetscInt ct = 0, max = 0; 813 #endif 814 815 PetscFunctionBegin; 816 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 817 818 baij->ht_size = (PetscInt)(factor * nz); 819 ht_size = baij->ht_size; 820 821 /* Allocate Memory for Hash Table */ 822 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 823 HD = baij->hd; 824 HT = baij->ht; 825 826 /* Loop Over A */ 827 for (i = 0; i < a->mbs; i++) { 828 for (j = ai[i]; j < ai[i + 1]; j++) { 829 row = i + rstart; 830 col = aj[j] + cstart; 831 832 key = row * Nbs + col + 1; 833 h1 = HASH(ht_size, key, tmp); 834 for (k = 0; k < ht_size; k++) { 835 if (!HT[(h1 + k) % ht_size]) { 836 HT[(h1 + k) % ht_size] = key; 837 HD[(h1 + k) % ht_size] = a->a + j * bs2; 838 break; 839 #if defined(PETSC_USE_INFO) 840 } else { 841 ct++; 842 #endif 843 } 844 } 845 #if defined(PETSC_USE_INFO) 846 if (k > max) max = k; 847 #endif 848 } 849 } 850 /* Loop Over B */ 851 for (i = 0; i < b->mbs; i++) { 852 for (j = bi[i]; j < bi[i + 1]; j++) { 853 row = i + rstart; 854 col = garray[bj[j]]; 855 key = row * Nbs + col + 1; 856 h1 = HASH(ht_size, key, tmp); 857 for (k = 0; k < ht_size; k++) { 858 if (!HT[(h1 + k) % ht_size]) { 859 HT[(h1 + k) % ht_size] = key; 860 HD[(h1 + k) % ht_size] = b->a + j * bs2; 861 break; 862 #if defined(PETSC_USE_INFO) 863 } else { 864 ct++; 865 #endif 866 } 867 } 868 #if defined(PETSC_USE_INFO) 869 if (k > max) max = k; 870 #endif 871 } 872 } 873 874 /* Print Summary */ 875 #if defined(PETSC_USE_INFO) 876 for (i = 0, j = 0; i < ht_size; i++) { 877 if (HT[i]) j++; 878 } 879 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 880 #endif 881 PetscFunctionReturn(PETSC_SUCCESS); 882 } 883 884 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 885 { 886 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 887 PetscInt nstash, reallocs; 888 889 PetscFunctionBegin; 890 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 891 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 894 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 895 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 896 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 897 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 898 PetscFunctionReturn(PETSC_SUCCESS); 899 } 900 901 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 902 { 903 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 904 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 905 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 906 PetscInt *row, *col; 907 PetscBool r1, r2, r3, other_disassembled; 908 MatScalar *val; 909 PetscMPIInt n; 910 911 PetscFunctionBegin; 912 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 913 if (!baij->donotstash && !mat->nooffprocentries) { 914 while (1) { 915 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 916 if (!flg) break; 917 918 for (i = 0; i < n;) { 919 /* Now identify the consecutive vals belonging to the same row */ 920 for (j = i, rstart = row[j]; j < n; j++) { 921 if (row[j] != rstart) break; 922 } 923 if (j < n) ncols = j - i; 924 else ncols = n - i; 925 /* Now assemble all these values with a single function call */ 926 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 927 i = j; 928 } 929 } 930 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 931 /* Now process the block-stash. Since the values are stashed column-oriented, 932 set the row-oriented flag to column-oriented, and after MatSetValues() 933 restore the original flags */ 934 r1 = baij->roworiented; 935 r2 = a->roworiented; 936 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 937 938 baij->roworiented = PETSC_FALSE; 939 a->roworiented = PETSC_FALSE; 940 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 961 } 962 963 PetscCall(MatAssemblyBegin(baij->A, mode)); 964 PetscCall(MatAssemblyEnd(baij->A, mode)); 965 966 /* determine if any processor has disassembled, if so we must 967 also disassemble ourselves, in order that we may reassemble. */ 968 /* 969 if nonzero structure of submatrix B cannot change then we know that 970 no processor disassembled thus we can skip this stuff 971 */ 972 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 973 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 974 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 975 } 976 977 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 978 PetscCall(MatAssemblyBegin(baij->B, mode)); 979 PetscCall(MatAssemblyEnd(baij->B, mode)); 980 981 #if defined(PETSC_USE_INFO) 982 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 983 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 984 985 baij->ht_total_ct = 0; 986 baij->ht_insert_ct = 0; 987 } 988 #endif 989 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 990 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 991 992 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 993 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 994 } 995 996 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 997 998 baij->rowvalues = NULL; 999 1000 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1001 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 1002 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1003 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1004 } 1005 PetscFunctionReturn(PETSC_SUCCESS); 1006 } 1007 1008 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1009 #include <petscdraw.h> 1010 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1011 { 1012 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1013 PetscMPIInt rank = baij->rank; 1014 PetscInt bs = mat->rmap->bs; 1015 PetscBool iascii, isdraw; 1016 PetscViewer sviewer; 1017 PetscViewerFormat format; 1018 1019 PetscFunctionBegin; 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1022 if (iascii) { 1023 PetscCall(PetscViewerGetFormat(viewer, &format)); 1024 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1025 MatInfo info; 1026 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1027 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1028 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1030 mat->rmap->bs, (double)info.memory)); 1031 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1033 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1034 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1035 PetscCall(PetscViewerFlush(viewer)); 1036 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1037 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1038 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1041 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1044 PetscFunctionReturn(PETSC_SUCCESS); 1045 } 1046 } 1047 1048 if (isdraw) { 1049 PetscDraw draw; 1050 PetscBool isnull; 1051 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1052 PetscCall(PetscDrawIsNull(draw, &isnull)); 1053 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1054 } 1055 1056 { 1057 /* assemble the entire matrix onto first processor. */ 1058 Mat A; 1059 Mat_SeqBAIJ *Aloc; 1060 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1061 MatScalar *a; 1062 const char *matname; 1063 1064 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1065 /* Perhaps this should be the type of mat? */ 1066 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1067 if (rank == 0) { 1068 PetscCall(MatSetSizes(A, M, N, M, N)); 1069 } else { 1070 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1071 } 1072 PetscCall(MatSetType(A, MATMPIBAIJ)); 1073 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1074 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1075 1076 /* copy over the A part */ 1077 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1078 ai = Aloc->i; 1079 aj = Aloc->j; 1080 a = Aloc->a; 1081 PetscCall(PetscMalloc1(bs, &rvals)); 1082 1083 for (i = 0; i < mbs; i++) { 1084 rvals[0] = bs * (baij->rstartbs + i); 1085 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1086 for (j = ai[i]; j < ai[i + 1]; j++) { 1087 col = (baij->cstartbs + aj[j]) * bs; 1088 for (k = 0; k < bs; k++) { 1089 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1090 col++; 1091 a += bs; 1092 } 1093 } 1094 } 1095 /* copy over the B part */ 1096 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1097 ai = Aloc->i; 1098 aj = Aloc->j; 1099 a = Aloc->a; 1100 for (i = 0; i < mbs; i++) { 1101 rvals[0] = bs * (baij->rstartbs + i); 1102 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1103 for (j = ai[i]; j < ai[i + 1]; j++) { 1104 col = baij->garray[aj[j]] * bs; 1105 for (k = 0; k < bs; k++) { 1106 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1107 col++; 1108 a += bs; 1109 } 1110 } 1111 } 1112 PetscCall(PetscFree(rvals)); 1113 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1114 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1115 /* 1116 Everyone has to call to draw the matrix since the graphics waits are 1117 synchronized across all processors that share the PetscDraw object 1118 */ 1119 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1120 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1121 if (rank == 0) { 1122 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1123 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1124 } 1125 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1126 PetscCall(MatDestroy(&A)); 1127 } 1128 PetscFunctionReturn(PETSC_SUCCESS); 1129 } 1130 1131 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1132 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1133 { 1134 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1135 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1136 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1137 const PetscInt *garray = aij->garray; 1138 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1139 PetscCount nz, hnz; 1140 PetscInt *rowlens, *colidxs; 1141 PetscScalar *matvals; 1142 PetscMPIInt rank; 1143 1144 PetscFunctionBegin; 1145 PetscCall(PetscViewerSetUp(viewer)); 1146 1147 M = mat->rmap->N; 1148 N = mat->cmap->N; 1149 m = mat->rmap->n; 1150 rs = mat->rmap->rstart; 1151 cs = mat->cmap->rstart; 1152 bs = mat->rmap->bs; 1153 nz = bs * bs * (A->nz + B->nz); 1154 1155 /* write matrix header */ 1156 header[0] = MAT_FILE_CLASSID; 1157 header[1] = M; 1158 header[2] = N; 1159 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_COUNT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1160 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1161 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1162 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1163 1164 /* fill in and store row lengths */ 1165 PetscCall(PetscMalloc1(m, &rowlens)); 1166 for (cnt = 0, i = 0; i < A->mbs; i++) 1167 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1168 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1169 PetscCall(PetscFree(rowlens)); 1170 1171 /* fill in and store column indices */ 1172 PetscCall(PetscMalloc1(nz, &colidxs)); 1173 for (cnt = 0, i = 0; i < A->mbs; i++) { 1174 for (k = 0; k < bs; k++) { 1175 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1176 if (garray[B->j[jb]] > cs / bs) break; 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1178 } 1179 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1180 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1181 for (; jb < B->i[i + 1]; jb++) 1182 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1183 } 1184 } 1185 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscCount_FMT, cnt, nz); 1186 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1187 PetscCall(PetscFree(colidxs)); 1188 1189 /* fill in and store nonzero values */ 1190 PetscCall(PetscMalloc1(nz, &matvals)); 1191 for (cnt = 0, i = 0; i < A->mbs; i++) { 1192 for (k = 0; k < bs; k++) { 1193 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1194 if (garray[B->j[jb]] > cs / bs) break; 1195 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1196 } 1197 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1198 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1199 for (; jb < B->i[i + 1]; jb++) 1200 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1201 } 1202 } 1203 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1204 PetscCall(PetscFree(matvals)); 1205 1206 /* write block size option to the viewer's .info file */ 1207 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1208 PetscFunctionReturn(PETSC_SUCCESS); 1209 } 1210 1211 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1212 { 1213 PetscBool iascii, isdraw, issocket, isbinary; 1214 1215 PetscFunctionBegin; 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1220 if (iascii || isdraw || issocket) { 1221 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1222 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1223 PetscFunctionReturn(PETSC_SUCCESS); 1224 } 1225 1226 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1227 { 1228 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1229 PetscInt nt; 1230 1231 PetscFunctionBegin; 1232 PetscCall(VecGetLocalSize(xx, &nt)); 1233 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1234 PetscCall(VecGetLocalSize(yy, &nt)); 1235 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1236 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1237 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1238 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1239 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1240 PetscFunctionReturn(PETSC_SUCCESS); 1241 } 1242 1243 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1244 { 1245 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1246 1247 PetscFunctionBegin; 1248 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1249 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1250 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1251 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1252 PetscFunctionReturn(PETSC_SUCCESS); 1253 } 1254 1255 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1256 { 1257 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1258 1259 PetscFunctionBegin; 1260 /* do nondiagonal part */ 1261 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1262 /* do local part */ 1263 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1264 /* add partial results together */ 1265 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscFunctionReturn(PETSC_SUCCESS); 1268 } 1269 1270 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1271 { 1272 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1273 1274 PetscFunctionBegin; 1275 /* do nondiagonal part */ 1276 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1277 /* do local part */ 1278 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1279 /* add partial results together */ 1280 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscFunctionReturn(PETSC_SUCCESS); 1283 } 1284 1285 /* 1286 This only works correctly for square matrices where the subblock A->A is the 1287 diagonal block 1288 */ 1289 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1290 { 1291 PetscFunctionBegin; 1292 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1293 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1294 PetscFunctionReturn(PETSC_SUCCESS); 1295 } 1296 1297 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1298 { 1299 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1300 1301 PetscFunctionBegin; 1302 PetscCall(MatScale(a->A, aa)); 1303 PetscCall(MatScale(a->B, aa)); 1304 PetscFunctionReturn(PETSC_SUCCESS); 1305 } 1306 1307 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1308 { 1309 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1310 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1311 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1312 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1313 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1314 1315 PetscFunctionBegin; 1316 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1317 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1318 mat->getrowactive = PETSC_TRUE; 1319 1320 if (!mat->rowvalues && (idx || v)) { 1321 /* 1322 allocate enough space to hold information from the longest row. 1323 */ 1324 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1325 PetscInt max = 1, mbs = mat->mbs, tmp; 1326 for (i = 0; i < mbs; i++) { 1327 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1328 if (max < tmp) max = tmp; 1329 } 1330 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1331 } 1332 lrow = row - brstart; 1333 1334 pvA = &vworkA; 1335 pcA = &cworkA; 1336 pvB = &vworkB; 1337 pcB = &cworkB; 1338 if (!v) { 1339 pvA = NULL; 1340 pvB = NULL; 1341 } 1342 if (!idx) { 1343 pcA = NULL; 1344 if (!v) pcB = NULL; 1345 } 1346 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1347 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1348 nztot = nzA + nzB; 1349 1350 cmap = mat->garray; 1351 if (v || idx) { 1352 if (nztot) { 1353 /* Sort by increasing column numbers, assuming A and B already sorted */ 1354 PetscInt imark = -1; 1355 if (v) { 1356 *v = v_p = mat->rowvalues; 1357 for (i = 0; i < nzB; i++) { 1358 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1359 else break; 1360 } 1361 imark = i; 1362 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1363 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1364 } 1365 if (idx) { 1366 *idx = idx_p = mat->rowindices; 1367 if (imark > -1) { 1368 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 } else { 1370 for (i = 0; i < nzB; i++) { 1371 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1372 else break; 1373 } 1374 imark = i; 1375 } 1376 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1377 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1378 } 1379 } else { 1380 if (idx) *idx = NULL; 1381 if (v) *v = NULL; 1382 } 1383 } 1384 *nz = nztot; 1385 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1386 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1391 { 1392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1393 1394 PetscFunctionBegin; 1395 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1396 baij->getrowactive = PETSC_FALSE; 1397 PetscFunctionReturn(PETSC_SUCCESS); 1398 } 1399 1400 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1401 { 1402 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1403 1404 PetscFunctionBegin; 1405 PetscCall(MatZeroEntries(l->A)); 1406 PetscCall(MatZeroEntries(l->B)); 1407 PetscFunctionReturn(PETSC_SUCCESS); 1408 } 1409 1410 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1411 { 1412 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1413 Mat A = a->A, B = a->B; 1414 PetscLogDouble isend[5], irecv[5]; 1415 1416 PetscFunctionBegin; 1417 info->block_size = (PetscReal)matin->rmap->bs; 1418 1419 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1420 1421 isend[0] = info->nz_used; 1422 isend[1] = info->nz_allocated; 1423 isend[2] = info->nz_unneeded; 1424 isend[3] = info->memory; 1425 isend[4] = info->mallocs; 1426 1427 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1428 1429 isend[0] += info->nz_used; 1430 isend[1] += info->nz_allocated; 1431 isend[2] += info->nz_unneeded; 1432 isend[3] += info->memory; 1433 isend[4] += info->mallocs; 1434 1435 if (flag == MAT_LOCAL) { 1436 info->nz_used = isend[0]; 1437 info->nz_allocated = isend[1]; 1438 info->nz_unneeded = isend[2]; 1439 info->memory = isend[3]; 1440 info->mallocs = isend[4]; 1441 } else if (flag == MAT_GLOBAL_MAX) { 1442 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1443 1444 info->nz_used = irecv[0]; 1445 info->nz_allocated = irecv[1]; 1446 info->nz_unneeded = irecv[2]; 1447 info->memory = irecv[3]; 1448 info->mallocs = irecv[4]; 1449 } else if (flag == MAT_GLOBAL_SUM) { 1450 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1451 1452 info->nz_used = irecv[0]; 1453 info->nz_allocated = irecv[1]; 1454 info->nz_unneeded = irecv[2]; 1455 info->memory = irecv[3]; 1456 info->mallocs = irecv[4]; 1457 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1458 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1459 info->fill_ratio_needed = 0; 1460 info->factor_mallocs = 0; 1461 PetscFunctionReturn(PETSC_SUCCESS); 1462 } 1463 1464 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1465 { 1466 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1467 1468 PetscFunctionBegin; 1469 switch (op) { 1470 case MAT_NEW_NONZERO_LOCATIONS: 1471 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1472 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1473 case MAT_KEEP_NONZERO_PATTERN: 1474 case MAT_NEW_NONZERO_LOCATION_ERR: 1475 MatCheckPreallocated(A, 1); 1476 PetscCall(MatSetOption(a->A, op, flg)); 1477 PetscCall(MatSetOption(a->B, op, flg)); 1478 break; 1479 case MAT_ROW_ORIENTED: 1480 MatCheckPreallocated(A, 1); 1481 a->roworiented = flg; 1482 1483 PetscCall(MatSetOption(a->A, op, flg)); 1484 PetscCall(MatSetOption(a->B, op, flg)); 1485 break; 1486 case MAT_FORCE_DIAGONAL_ENTRIES: 1487 case MAT_SORTED_FULL: 1488 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1489 break; 1490 case MAT_IGNORE_OFF_PROC_ENTRIES: 1491 a->donotstash = flg; 1492 break; 1493 case MAT_USE_HASH_TABLE: 1494 a->ht_flag = flg; 1495 a->ht_fact = 1.39; 1496 break; 1497 case MAT_SYMMETRIC: 1498 case MAT_STRUCTURALLY_SYMMETRIC: 1499 case MAT_HERMITIAN: 1500 case MAT_SUBMAT_SINGLEIS: 1501 case MAT_SYMMETRY_ETERNAL: 1502 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1503 case MAT_SPD_ETERNAL: 1504 /* if the diagonal matrix is square it inherits some of the properties above */ 1505 break; 1506 default: 1507 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1508 } 1509 PetscFunctionReturn(PETSC_SUCCESS); 1510 } 1511 1512 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1513 { 1514 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1515 Mat_SeqBAIJ *Aloc; 1516 Mat B; 1517 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1518 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1519 MatScalar *a; 1520 1521 PetscFunctionBegin; 1522 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1523 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1524 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1525 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1526 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1527 /* Do not know preallocation information, but must set block size */ 1528 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1529 } else { 1530 B = *matout; 1531 } 1532 1533 /* copy over the A part */ 1534 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1535 ai = Aloc->i; 1536 aj = Aloc->j; 1537 a = Aloc->a; 1538 PetscCall(PetscMalloc1(bs, &rvals)); 1539 1540 for (i = 0; i < mbs; i++) { 1541 rvals[0] = bs * (baij->rstartbs + i); 1542 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1543 for (j = ai[i]; j < ai[i + 1]; j++) { 1544 col = (baij->cstartbs + aj[j]) * bs; 1545 for (k = 0; k < bs; k++) { 1546 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1547 1548 col++; 1549 a += bs; 1550 } 1551 } 1552 } 1553 /* copy over the B part */ 1554 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1555 ai = Aloc->i; 1556 aj = Aloc->j; 1557 a = Aloc->a; 1558 for (i = 0; i < mbs; i++) { 1559 rvals[0] = bs * (baij->rstartbs + i); 1560 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1561 for (j = ai[i]; j < ai[i + 1]; j++) { 1562 col = baij->garray[aj[j]] * bs; 1563 for (k = 0; k < bs; k++) { 1564 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1565 col++; 1566 a += bs; 1567 } 1568 } 1569 } 1570 PetscCall(PetscFree(rvals)); 1571 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1572 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1573 1574 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1575 else PetscCall(MatHeaderMerge(A, &B)); 1576 PetscFunctionReturn(PETSC_SUCCESS); 1577 } 1578 1579 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1580 { 1581 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1582 Mat a = baij->A, b = baij->B; 1583 PetscInt s1, s2, s3; 1584 1585 PetscFunctionBegin; 1586 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1587 if (rr) { 1588 PetscCall(VecGetLocalSize(rr, &s1)); 1589 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1590 /* Overlap communication with computation. */ 1591 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1592 } 1593 if (ll) { 1594 PetscCall(VecGetLocalSize(ll, &s1)); 1595 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1596 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1597 } 1598 /* scale the diagonal block */ 1599 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1600 1601 if (rr) { 1602 /* Do a scatter end and then right scale the off-diagonal block */ 1603 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1604 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1605 } 1606 PetscFunctionReturn(PETSC_SUCCESS); 1607 } 1608 1609 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1610 { 1611 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1612 PetscInt *lrows; 1613 PetscInt r, len; 1614 PetscBool cong; 1615 1616 PetscFunctionBegin; 1617 /* get locally owned rows */ 1618 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1619 /* fix right-hand side if needed */ 1620 if (x && b) { 1621 const PetscScalar *xx; 1622 PetscScalar *bb; 1623 1624 PetscCall(VecGetArrayRead(x, &xx)); 1625 PetscCall(VecGetArray(b, &bb)); 1626 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1627 PetscCall(VecRestoreArrayRead(x, &xx)); 1628 PetscCall(VecRestoreArray(b, &bb)); 1629 } 1630 1631 /* actually zap the local rows */ 1632 /* 1633 Zero the required rows. If the "diagonal block" of the matrix 1634 is square and the user wishes to set the diagonal we use separate 1635 code so that MatSetValues() is not called for each diagonal allocating 1636 new memory, thus calling lots of mallocs and slowing things down. 1637 1638 */ 1639 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1640 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1641 PetscCall(MatHasCongruentLayouts(A, &cong)); 1642 if ((diag != 0.0) && cong) { 1643 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1644 } else if (diag != 0.0) { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1646 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1647 for (r = 0; r < len; ++r) { 1648 const PetscInt row = lrows[r] + A->rmap->rstart; 1649 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1650 } 1651 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1653 } else { 1654 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1655 } 1656 PetscCall(PetscFree(lrows)); 1657 1658 /* only change matrix nonzero state if pattern was allowed to be changed */ 1659 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1660 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1661 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1662 } 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1669 PetscMPIInt n, p = 0; 1670 PetscInt i, j, k, r, len = 0, row, col, count; 1671 PetscInt *lrows, *owners = A->rmap->range; 1672 PetscSFNode *rrows; 1673 PetscSF sf; 1674 const PetscScalar *xx; 1675 PetscScalar *bb, *mask; 1676 Vec xmask, lmask; 1677 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1678 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1679 PetscScalar *aa; 1680 1681 PetscFunctionBegin; 1682 PetscCall(PetscMPIIntCast(A->rmap->n, &n)); 1683 /* Create SF where leaves are input rows and roots are owned rows */ 1684 PetscCall(PetscMalloc1(n, &lrows)); 1685 for (r = 0; r < n; ++r) lrows[r] = -1; 1686 PetscCall(PetscMalloc1(N, &rrows)); 1687 for (r = 0; r < N; ++r) { 1688 const PetscInt idx = rows[r]; 1689 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1690 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1691 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1692 } 1693 rrows[r].rank = p; 1694 rrows[r].index = rows[r] - owners[p]; 1695 } 1696 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1697 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1698 /* Collect flags for rows to be zeroed */ 1699 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1700 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFDestroy(&sf)); 1702 /* Compress and put in row numbers */ 1703 for (r = 0; r < n; ++r) 1704 if (lrows[r] >= 0) lrows[len++] = r; 1705 /* zero diagonal part of matrix */ 1706 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1707 /* handle off-diagonal part of matrix */ 1708 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1709 PetscCall(VecDuplicate(l->lvec, &lmask)); 1710 PetscCall(VecGetArray(xmask, &bb)); 1711 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1712 PetscCall(VecRestoreArray(xmask, &bb)); 1713 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecDestroy(&xmask)); 1716 if (x) { 1717 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1720 PetscCall(VecGetArray(b, &bb)); 1721 } 1722 PetscCall(VecGetArray(lmask, &mask)); 1723 /* remove zeroed rows of off-diagonal matrix */ 1724 for (i = 0; i < len; ++i) { 1725 row = lrows[i]; 1726 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1727 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1728 for (k = 0; k < count; ++k) { 1729 aa[0] = 0.0; 1730 aa += bs; 1731 } 1732 } 1733 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1734 for (i = 0; i < l->B->rmap->N; ++i) { 1735 row = i / bs; 1736 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1737 for (k = 0; k < bs; ++k) { 1738 col = bs * baij->j[j] + k; 1739 if (PetscAbsScalar(mask[col])) { 1740 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1741 if (x) bb[i] -= aa[0] * xx[col]; 1742 aa[0] = 0.0; 1743 } 1744 } 1745 } 1746 } 1747 if (x) { 1748 PetscCall(VecRestoreArray(b, &bb)); 1749 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1750 } 1751 PetscCall(VecRestoreArray(lmask, &mask)); 1752 PetscCall(VecDestroy(&lmask)); 1753 PetscCall(PetscFree(lrows)); 1754 1755 /* only change matrix nonzero state if pattern was allowed to be changed */ 1756 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1757 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1758 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1759 } 1760 PetscFunctionReturn(PETSC_SUCCESS); 1761 } 1762 1763 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1764 { 1765 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1766 1767 PetscFunctionBegin; 1768 PetscCall(MatSetUnfactored(a->A)); 1769 PetscFunctionReturn(PETSC_SUCCESS); 1770 } 1771 1772 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1773 1774 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1775 { 1776 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1777 Mat a, b, c, d; 1778 PetscBool flg; 1779 1780 PetscFunctionBegin; 1781 a = matA->A; 1782 b = matA->B; 1783 c = matB->A; 1784 d = matB->B; 1785 1786 PetscCall(MatEqual(a, c, &flg)); 1787 if (flg) PetscCall(MatEqual(b, d, &flg)); 1788 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1789 PetscFunctionReturn(PETSC_SUCCESS); 1790 } 1791 1792 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1793 { 1794 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1795 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1796 1797 PetscFunctionBegin; 1798 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1799 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1800 PetscCall(MatCopy_Basic(A, B, str)); 1801 } else { 1802 PetscCall(MatCopy(a->A, b->A, str)); 1803 PetscCall(MatCopy(a->B, b->B, str)); 1804 } 1805 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1806 PetscFunctionReturn(PETSC_SUCCESS); 1807 } 1808 1809 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1810 { 1811 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1812 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1813 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1814 1815 PetscFunctionBegin; 1816 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1817 PetscFunctionReturn(PETSC_SUCCESS); 1818 } 1819 1820 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1821 { 1822 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1823 PetscBLASInt bnz, one = 1; 1824 Mat_SeqBAIJ *x, *y; 1825 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1826 1827 PetscFunctionBegin; 1828 if (str == SAME_NONZERO_PATTERN) { 1829 PetscScalar alpha = a; 1830 x = (Mat_SeqBAIJ *)xx->A->data; 1831 y = (Mat_SeqBAIJ *)yy->A->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 x = (Mat_SeqBAIJ *)xx->B->data; 1835 y = (Mat_SeqBAIJ *)yy->B->data; 1836 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1837 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1838 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1839 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1840 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1841 } else { 1842 Mat B; 1843 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1844 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1845 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1846 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1847 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1848 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1849 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1850 PetscCall(MatSetType(B, MATMPIBAIJ)); 1851 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1852 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1853 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1854 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1855 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1856 PetscCall(MatHeaderMerge(Y, &B)); 1857 PetscCall(PetscFree(nnz_d)); 1858 PetscCall(PetscFree(nnz_o)); 1859 } 1860 PetscFunctionReturn(PETSC_SUCCESS); 1861 } 1862 1863 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1864 { 1865 PetscFunctionBegin; 1866 if (PetscDefined(USE_COMPLEX)) { 1867 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1868 1869 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1870 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1871 } 1872 PetscFunctionReturn(PETSC_SUCCESS); 1873 } 1874 1875 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1876 { 1877 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1878 1879 PetscFunctionBegin; 1880 PetscCall(MatRealPart(a->A)); 1881 PetscCall(MatRealPart(a->B)); 1882 PetscFunctionReturn(PETSC_SUCCESS); 1883 } 1884 1885 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1886 { 1887 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1888 1889 PetscFunctionBegin; 1890 PetscCall(MatImaginaryPart(a->A)); 1891 PetscCall(MatImaginaryPart(a->B)); 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1896 { 1897 IS iscol_local; 1898 PetscInt csize; 1899 1900 PetscFunctionBegin; 1901 PetscCall(ISGetLocalSize(iscol, &csize)); 1902 if (call == MAT_REUSE_MATRIX) { 1903 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1904 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1905 } else { 1906 PetscCall(ISAllGather(iscol, &iscol_local)); 1907 } 1908 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1909 if (call == MAT_INITIAL_MATRIX) { 1910 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1911 PetscCall(ISDestroy(&iscol_local)); 1912 } 1913 PetscFunctionReturn(PETSC_SUCCESS); 1914 } 1915 1916 /* 1917 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1918 in local and then by concatenating the local matrices the end result. 1919 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1920 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1921 */ 1922 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1923 { 1924 PetscMPIInt rank, size; 1925 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1926 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1927 Mat M, Mreuse; 1928 MatScalar *vwork, *aa; 1929 MPI_Comm comm; 1930 IS isrow_new, iscol_new; 1931 Mat_SeqBAIJ *aij; 1932 1933 PetscFunctionBegin; 1934 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1935 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1936 PetscCallMPI(MPI_Comm_size(comm, &size)); 1937 /* The compression and expansion should be avoided. Doesn't point 1938 out errors, might change the indices, hence buggey */ 1939 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1940 if (isrow == iscol) { 1941 iscol_new = isrow_new; 1942 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1943 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj = PetscSafePointerPlusOffset(jj, nz); 2034 vwork = aa; 2035 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(PETSC_SUCCESS); 2049 } 2050 2051 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(PETSC_SUCCESS); 2088 } 2089 2090 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(PETSC_SUCCESS); 2099 } 2100 2101 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* Tell every processor the number of nonzeros per row */ 2116 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2117 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2118 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2119 displs = recvcounts + size; 2120 for (i = 0; i < size; i++) { 2121 PetscCall(PetscMPIIntCast(A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs, &recvcounts[i])); 2122 PetscCall(PetscMPIIntCast(A->rmap->range[i] / bs, &displs[i])); 2123 } 2124 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2125 /* Create the sequential matrix of the same type as the local block diagonal */ 2126 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2127 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2128 PetscCall(MatSetType(B, MATSEQAIJ)); 2129 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2130 b = (Mat_SeqAIJ *)B->data; 2131 2132 /* Copy my part of matrix column indices over */ 2133 sendcount = ad->nz + bd->nz; 2134 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2135 a_jsendbuf = ad->j; 2136 b_jsendbuf = bd->j; 2137 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2138 cnt = 0; 2139 for (i = 0; i < n; i++) { 2140 /* put in lower diagonal portion */ 2141 m = bd->i[i + 1] - bd->i[i]; 2142 while (m > 0) { 2143 /* is it above diagonal (in bd (compressed) numbering) */ 2144 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2145 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2146 m--; 2147 } 2148 2149 /* put in diagonal portion */ 2150 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2151 2152 /* put in upper diagonal portion */ 2153 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2154 } 2155 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2156 2157 /* Gather all column indices to all processors */ 2158 for (i = 0; i < size; i++) { 2159 recvcounts[i] = 0; 2160 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2161 } 2162 displs[0] = 0; 2163 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2164 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2165 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2166 /* set the b->ilen (length of each row) values */ 2167 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2168 /* set the b->i indices */ 2169 b->i[0] = 0; 2170 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2171 PetscCall(PetscFree(lens)); 2172 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2173 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(PetscFree(recvcounts)); 2175 2176 PetscCall(MatPropagateSymmetryOptions(A, B)); 2177 *newmat = B; 2178 PetscFunctionReturn(PETSC_SUCCESS); 2179 } 2180 2181 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2182 { 2183 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2184 Vec bb1 = NULL; 2185 2186 PetscFunctionBegin; 2187 if (flag == SOR_APPLY_UPPER) { 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2189 PetscFunctionReturn(PETSC_SUCCESS); 2190 } 2191 2192 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2193 2194 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2195 if (flag & SOR_ZERO_INITIAL_GUESS) { 2196 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2197 its--; 2198 } 2199 2200 while (its--) { 2201 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2202 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 2204 /* update rhs: bb1 = bb - B*x */ 2205 PetscCall(VecScale(mat->lvec, -1.0)); 2206 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2207 2208 /* local sweep */ 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2210 } 2211 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2212 if (flag & SOR_ZERO_INITIAL_GUESS) { 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2214 its--; 2215 } 2216 while (its--) { 2217 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2218 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 2220 /* update rhs: bb1 = bb - B*x */ 2221 PetscCall(VecScale(mat->lvec, -1.0)); 2222 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2223 2224 /* local sweep */ 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2226 } 2227 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2228 if (flag & SOR_ZERO_INITIAL_GUESS) { 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2230 its--; 2231 } 2232 while (its--) { 2233 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2234 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 2236 /* update rhs: bb1 = bb - B*x */ 2237 PetscCall(VecScale(mat->lvec, -1.0)); 2238 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2239 2240 /* local sweep */ 2241 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2242 } 2243 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2244 2245 PetscCall(VecDestroy(&bb1)); 2246 PetscFunctionReturn(PETSC_SUCCESS); 2247 } 2248 2249 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2250 { 2251 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2252 PetscInt m, N, i, *garray = aij->garray; 2253 PetscInt ib, jb, bs = A->rmap->bs; 2254 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2255 MatScalar *a_val = a_aij->a; 2256 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2257 MatScalar *b_val = b_aij->a; 2258 PetscReal *work; 2259 PetscMPIInt iN; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatGetSize(A, &m, &N)); 2263 PetscCall(PetscCalloc1(N, &work)); 2264 if (type == NORM_2) { 2265 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2269 a_val++; 2270 } 2271 } 2272 } 2273 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2277 b_val++; 2278 } 2279 } 2280 } 2281 } else if (type == NORM_1) { 2282 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2286 a_val++; 2287 } 2288 } 2289 } 2290 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2294 b_val++; 2295 } 2296 } 2297 } 2298 } else if (type == NORM_INFINITY) { 2299 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 PetscInt col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2304 a_val++; 2305 } 2306 } 2307 } 2308 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 PetscInt col = garray[b_aij->j[i]] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2313 b_val++; 2314 } 2315 } 2316 } 2317 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2318 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2335 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2344 for (jb = 0; jb < bs; jb++) { 2345 for (ib = 0; ib < bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2352 PetscCall(PetscMPIIntCast(N, &iN)); 2353 if (type == NORM_INFINITY) { 2354 PetscCall(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2355 } else { 2356 PetscCall(MPIU_Allreduce(work, reductions, iN, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2357 } 2358 PetscCall(PetscFree(work)); 2359 if (type == NORM_2) { 2360 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2361 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2362 for (i = 0; i < N; i++) reductions[i] /= m; 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2368 { 2369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2370 2371 PetscFunctionBegin; 2372 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2373 A->factorerrortype = a->A->factorerrortype; 2374 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2375 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2380 { 2381 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2382 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2383 2384 PetscFunctionBegin; 2385 if (!Y->preallocated) { 2386 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2387 } else if (!aij->nz) { 2388 PetscInt nonew = aij->nonew; 2389 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2390 aij->nonew = nonew; 2391 } 2392 PetscCall(MatShift_Basic(Y, a)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 2396 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2397 { 2398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2399 2400 PetscFunctionBegin; 2401 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2402 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2403 if (d) { 2404 PetscInt rstart; 2405 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2406 *d += rstart / A->rmap->bs; 2407 } 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2412 { 2413 PetscFunctionBegin; 2414 *a = ((Mat_MPIBAIJ *)A->data)->A; 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2419 { 2420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2421 2422 PetscFunctionBegin; 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2424 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2429 MatGetRow_MPIBAIJ, 2430 MatRestoreRow_MPIBAIJ, 2431 MatMult_MPIBAIJ, 2432 /* 4*/ MatMultAdd_MPIBAIJ, 2433 MatMultTranspose_MPIBAIJ, 2434 MatMultTransposeAdd_MPIBAIJ, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*10*/ NULL, 2439 NULL, 2440 NULL, 2441 MatSOR_MPIBAIJ, 2442 MatTranspose_MPIBAIJ, 2443 /*15*/ MatGetInfo_MPIBAIJ, 2444 MatEqual_MPIBAIJ, 2445 MatGetDiagonal_MPIBAIJ, 2446 MatDiagonalScale_MPIBAIJ, 2447 MatNorm_MPIBAIJ, 2448 /*20*/ MatAssemblyBegin_MPIBAIJ, 2449 MatAssemblyEnd_MPIBAIJ, 2450 MatSetOption_MPIBAIJ, 2451 MatZeroEntries_MPIBAIJ, 2452 /*24*/ MatZeroRows_MPIBAIJ, 2453 NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 /*29*/ MatSetUp_MPI_Hash, 2458 NULL, 2459 NULL, 2460 MatGetDiagonalBlock_MPIBAIJ, 2461 NULL, 2462 /*34*/ MatDuplicate_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 /*39*/ MatAXPY_MPIBAIJ, 2468 MatCreateSubMatrices_MPIBAIJ, 2469 MatIncreaseOverlap_MPIBAIJ, 2470 MatGetValues_MPIBAIJ, 2471 MatCopy_MPIBAIJ, 2472 /*44*/ NULL, 2473 MatScale_MPIBAIJ, 2474 MatShift_MPIBAIJ, 2475 NULL, 2476 MatZeroRowsColumns_MPIBAIJ, 2477 /*49*/ NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 /*54*/ MatFDColoringCreate_MPIXAIJ, 2483 NULL, 2484 MatSetUnfactored_MPIBAIJ, 2485 MatPermute_MPIBAIJ, 2486 MatSetValuesBlocked_MPIBAIJ, 2487 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2488 MatDestroy_MPIBAIJ, 2489 MatView_MPIBAIJ, 2490 NULL, 2491 NULL, 2492 /*64*/ NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*74*/ NULL, 2503 MatFDColoringApply_BAIJ, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*79*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 MatLoad_MPIBAIJ, 2512 /*84*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*89*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*94*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*99*/ NULL, 2528 NULL, 2529 NULL, 2530 MatConjugate_MPIBAIJ, 2531 NULL, 2532 /*104*/ NULL, 2533 MatRealPart_MPIBAIJ, 2534 MatImaginaryPart_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 /*109*/ NULL, 2538 NULL, 2539 NULL, 2540 NULL, 2541 MatMissingDiagonal_MPIBAIJ, 2542 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2543 NULL, 2544 MatGetGhosts_MPIBAIJ, 2545 NULL, 2546 NULL, 2547 /*119*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 MatGetMultiProcBlock_MPIBAIJ, 2552 /*124*/ NULL, 2553 MatGetColumnReductions_MPIBAIJ, 2554 MatInvertBlockDiagonal_MPIBAIJ, 2555 NULL, 2556 NULL, 2557 /*129*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*134*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*139*/ MatSetBlockSizes_Default, 2568 NULL, 2569 NULL, 2570 MatFDColoringSetUp_MPIXAIJ, 2571 NULL, 2572 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 /*150*/ NULL, 2579 MatEliminateZeros_MPIBAIJ, 2580 MatGetRowSumAbs_MPIBAIJ, 2581 NULL}; 2582 2583 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2584 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2585 2586 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2587 { 2588 PetscInt m, rstart, cstart, cend; 2589 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2590 const PetscInt *JJ = NULL; 2591 PetscScalar *values = NULL; 2592 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2593 PetscBool nooffprocentries; 2594 2595 PetscFunctionBegin; 2596 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2597 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2598 PetscCall(PetscLayoutSetUp(B->rmap)); 2599 PetscCall(PetscLayoutSetUp(B->cmap)); 2600 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2601 m = B->rmap->n / bs; 2602 rstart = B->rmap->rstart / bs; 2603 cstart = B->cmap->rstart / bs; 2604 cend = B->cmap->rend / bs; 2605 2606 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2607 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2608 for (i = 0; i < m; i++) { 2609 nz = ii[i + 1] - ii[i]; 2610 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2611 nz_max = PetscMax(nz_max, nz); 2612 dlen = 0; 2613 olen = 0; 2614 JJ = jj + ii[i]; 2615 for (j = 0; j < nz; j++) { 2616 if (*JJ < cstart || *JJ >= cend) olen++; 2617 else dlen++; 2618 JJ++; 2619 } 2620 d_nnz[i] = dlen; 2621 o_nnz[i] = olen; 2622 } 2623 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2624 PetscCall(PetscFree2(d_nnz, o_nnz)); 2625 2626 values = (PetscScalar *)V; 2627 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2628 for (i = 0; i < m; i++) { 2629 PetscInt row = i + rstart; 2630 PetscInt ncols = ii[i + 1] - ii[i]; 2631 const PetscInt *icols = jj + ii[i]; 2632 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2633 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2634 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2635 } else { /* block ordering does not match so we can only insert one block at a time. */ 2636 PetscInt j; 2637 for (j = 0; j < ncols; j++) { 2638 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2639 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2640 } 2641 } 2642 } 2643 2644 if (!V) PetscCall(PetscFree(values)); 2645 nooffprocentries = B->nooffprocentries; 2646 B->nooffprocentries = PETSC_TRUE; 2647 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2648 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2649 B->nooffprocentries = nooffprocentries; 2650 2651 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 /*@C 2656 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2657 2658 Collective 2659 2660 Input Parameters: 2661 + B - the matrix 2662 . bs - the block size 2663 . i - the indices into `j` for the start of each local row (starts with zero) 2664 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2665 - v - optional values in the matrix, use `NULL` if not provided 2666 2667 Level: advanced 2668 2669 Notes: 2670 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2671 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2672 called this routine. 2673 2674 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2675 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2676 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2677 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2678 block column and the second index is over columns within a block. 2679 2680 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2681 2682 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2683 @*/ 2684 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2685 { 2686 PetscFunctionBegin; 2687 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2688 PetscValidType(B, 1); 2689 PetscValidLogicalCollectiveInt(B, bs, 2); 2690 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2691 PetscFunctionReturn(PETSC_SUCCESS); 2692 } 2693 2694 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2695 { 2696 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2697 PetscInt i; 2698 PetscMPIInt size; 2699 2700 PetscFunctionBegin; 2701 if (B->hash_active) { 2702 B->ops[0] = b->cops; 2703 B->hash_active = PETSC_FALSE; 2704 } 2705 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2706 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2707 PetscCall(PetscLayoutSetUp(B->rmap)); 2708 PetscCall(PetscLayoutSetUp(B->cmap)); 2709 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2710 2711 if (d_nnz) { 2712 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2713 } 2714 if (o_nnz) { 2715 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2716 } 2717 2718 b->bs2 = bs * bs; 2719 b->mbs = B->rmap->n / bs; 2720 b->nbs = B->cmap->n / bs; 2721 b->Mbs = B->rmap->N / bs; 2722 b->Nbs = B->cmap->N / bs; 2723 2724 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2725 b->rstartbs = B->rmap->rstart / bs; 2726 b->rendbs = B->rmap->rend / bs; 2727 b->cstartbs = B->cmap->rstart / bs; 2728 b->cendbs = B->cmap->rend / bs; 2729 2730 #if defined(PETSC_USE_CTABLE) 2731 PetscCall(PetscHMapIDestroy(&b->colmap)); 2732 #else 2733 PetscCall(PetscFree(b->colmap)); 2734 #endif 2735 PetscCall(PetscFree(b->garray)); 2736 PetscCall(VecDestroy(&b->lvec)); 2737 PetscCall(VecScatterDestroy(&b->Mvctx)); 2738 2739 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2740 2741 MatSeqXAIJGetOptions_Private(b->B); 2742 PetscCall(MatDestroy(&b->B)); 2743 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2744 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2745 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2746 MatSeqXAIJRestoreOptions_Private(b->B); 2747 2748 MatSeqXAIJGetOptions_Private(b->A); 2749 PetscCall(MatDestroy(&b->A)); 2750 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2751 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2752 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2753 MatSeqXAIJRestoreOptions_Private(b->A); 2754 2755 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2756 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2757 B->preallocated = PETSC_TRUE; 2758 B->was_assembled = PETSC_FALSE; 2759 B->assembled = PETSC_FALSE; 2760 PetscFunctionReturn(PETSC_SUCCESS); 2761 } 2762 2763 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2764 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2765 2766 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2767 { 2768 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2769 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2770 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2771 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2772 2773 PetscFunctionBegin; 2774 PetscCall(PetscMalloc1(M + 1, &ii)); 2775 ii[0] = 0; 2776 for (i = 0; i < M; i++) { 2777 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2778 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2779 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2780 /* remove one from count of matrix has diagonal */ 2781 for (j = id[i]; j < id[i + 1]; j++) { 2782 if (jd[j] == i) { 2783 ii[i + 1]--; 2784 break; 2785 } 2786 } 2787 } 2788 PetscCall(PetscMalloc1(ii[M], &jj)); 2789 cnt = 0; 2790 for (i = 0; i < M; i++) { 2791 for (j = io[i]; j < io[i + 1]; j++) { 2792 if (garray[jo[j]] > rstart) break; 2793 jj[cnt++] = garray[jo[j]]; 2794 } 2795 for (k = id[i]; k < id[i + 1]; k++) { 2796 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2797 } 2798 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2799 } 2800 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2801 PetscFunctionReturn(PETSC_SUCCESS); 2802 } 2803 2804 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2805 2806 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2807 2808 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2809 { 2810 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2811 Mat_MPIAIJ *b; 2812 Mat B; 2813 2814 PetscFunctionBegin; 2815 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2816 2817 if (reuse == MAT_REUSE_MATRIX) { 2818 B = *newmat; 2819 } else { 2820 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2821 PetscCall(MatSetType(B, MATMPIAIJ)); 2822 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2823 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2824 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2825 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2826 } 2827 b = (Mat_MPIAIJ *)B->data; 2828 2829 if (reuse == MAT_REUSE_MATRIX) { 2830 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2831 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2832 } else { 2833 PetscInt *garray = a->garray; 2834 Mat_SeqAIJ *bB; 2835 PetscInt bs, nnz; 2836 PetscCall(MatDestroy(&b->A)); 2837 PetscCall(MatDestroy(&b->B)); 2838 /* just clear out the data structure */ 2839 PetscCall(MatDisAssemble_MPIAIJ(B)); 2840 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2841 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2842 2843 /* Global numbering for b->B columns */ 2844 bB = (Mat_SeqAIJ *)b->B->data; 2845 bs = A->rmap->bs; 2846 nnz = bB->i[A->rmap->n]; 2847 for (PetscInt k = 0; k < nnz; k++) { 2848 PetscInt bj = bB->j[k] / bs; 2849 PetscInt br = bB->j[k] % bs; 2850 bB->j[k] = garray[bj] * bs + br; 2851 } 2852 } 2853 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2854 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2855 2856 if (reuse == MAT_INPLACE_MATRIX) { 2857 PetscCall(MatHeaderReplace(A, &B)); 2858 } else { 2859 *newmat = B; 2860 } 2861 PetscFunctionReturn(PETSC_SUCCESS); 2862 } 2863 2864 /*MC 2865 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2866 2867 Options Database Keys: 2868 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2869 . -mat_block_size <bs> - set the blocksize used to store the matrix 2870 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2871 - -mat_use_hash_table <fact> - set hash table factor 2872 2873 Level: beginner 2874 2875 Note: 2876 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2877 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2878 2879 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2880 M*/ 2881 2882 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2883 2884 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2885 { 2886 Mat_MPIBAIJ *b; 2887 PetscBool flg = PETSC_FALSE; 2888 2889 PetscFunctionBegin; 2890 PetscCall(PetscNew(&b)); 2891 B->data = (void *)b; 2892 B->ops[0] = MatOps_Values; 2893 B->assembled = PETSC_FALSE; 2894 2895 B->insertmode = NOT_SET_VALUES; 2896 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2897 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2898 2899 /* build local table of row and column ownerships */ 2900 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2901 2902 /* build cache for off array entries formed */ 2903 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2904 2905 b->donotstash = PETSC_FALSE; 2906 b->colmap = NULL; 2907 b->garray = NULL; 2908 b->roworiented = PETSC_TRUE; 2909 2910 /* stuff used in block assembly */ 2911 b->barray = NULL; 2912 2913 /* stuff used for matrix vector multiply */ 2914 b->lvec = NULL; 2915 b->Mvctx = NULL; 2916 2917 /* stuff for MatGetRow() */ 2918 b->rowindices = NULL; 2919 b->rowvalues = NULL; 2920 b->getrowactive = PETSC_FALSE; 2921 2922 /* hash table stuff */ 2923 b->ht = NULL; 2924 b->hd = NULL; 2925 b->ht_size = 0; 2926 b->ht_flag = PETSC_FALSE; 2927 b->ht_fact = 0; 2928 b->ht_total_ct = 0; 2929 b->ht_insert_ct = 0; 2930 2931 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2932 b->ijonly = PETSC_FALSE; 2933 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2937 #if defined(PETSC_HAVE_HYPRE) 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2939 #endif 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2943 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2944 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2945 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2946 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2947 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2948 2949 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2950 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2951 if (flg) { 2952 PetscReal fact = 1.39; 2953 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2954 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2955 if (fact <= 1.0) fact = 1.39; 2956 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2957 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2958 } 2959 PetscOptionsEnd(); 2960 PetscFunctionReturn(PETSC_SUCCESS); 2961 } 2962 2963 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2964 /*MC 2965 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2966 2967 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2968 and `MATMPIBAIJ` otherwise. 2969 2970 Options Database Keys: 2971 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2972 2973 Level: beginner 2974 2975 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2976 M*/ 2977 2978 /*@ 2979 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2980 (block compressed row). 2981 2982 Collective 2983 2984 Input Parameters: 2985 + B - the matrix 2986 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2987 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2988 . d_nz - number of block nonzeros per block row in diagonal portion of local 2989 submatrix (same for all local rows) 2990 . d_nnz - array containing the number of block nonzeros in the various block rows 2991 of the in diagonal portion of the local (possibly different for each block 2992 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2993 set it even if it is zero. 2994 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2995 submatrix (same for all local rows). 2996 - o_nnz - array containing the number of nonzeros in the various block rows of the 2997 off-diagonal portion of the local submatrix (possibly different for 2998 each block row) or `NULL`. 2999 3000 If the *_nnz parameter is given then the *_nz parameter is ignored 3001 3002 Options Database Keys: 3003 + -mat_block_size - size of the blocks to use 3004 - -mat_use_hash_table <fact> - set hash table factor 3005 3006 Level: intermediate 3007 3008 Notes: 3009 For good matrix assembly performance 3010 the user should preallocate the matrix storage by setting the parameters 3011 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3012 performance can be increased by more than a factor of 50. 3013 3014 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3015 than it must be used on all processors that share the object for that argument. 3016 3017 Storage Information: 3018 For a square global matrix we define each processor's diagonal portion 3019 to be its local rows and the corresponding columns (a square submatrix); 3020 each processor's off-diagonal portion encompasses the remainder of the 3021 local matrix (a rectangular submatrix). 3022 3023 The user can specify preallocated storage for the diagonal part of 3024 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3025 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3026 memory allocation. Likewise, specify preallocated storage for the 3027 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3028 3029 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3030 the figure below we depict these three local rows and all columns (0-11). 3031 3032 .vb 3033 0 1 2 3 4 5 6 7 8 9 10 11 3034 -------------------------- 3035 row 3 |o o o d d d o o o o o o 3036 row 4 |o o o d d d o o o o o o 3037 row 5 |o o o d d d o o o o o o 3038 -------------------------- 3039 .ve 3040 3041 Thus, any entries in the d locations are stored in the d (diagonal) 3042 submatrix, and any entries in the o locations are stored in the 3043 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3044 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3045 3046 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3047 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3048 In general, for PDE problems in which most nonzeros are near the diagonal, 3049 one expects `d_nz` >> `o_nz`. 3050 3051 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3052 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3053 You can also run with the option `-info` and look for messages with the string 3054 malloc in them to see if additional memory allocation was needed. 3055 3056 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3057 @*/ 3058 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3059 { 3060 PetscFunctionBegin; 3061 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3062 PetscValidType(B, 1); 3063 PetscValidLogicalCollectiveInt(B, bs, 2); 3064 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3065 PetscFunctionReturn(PETSC_SUCCESS); 3066 } 3067 3068 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3069 /*@ 3070 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3071 (block compressed row). 3072 3073 Collective 3074 3075 Input Parameters: 3076 + comm - MPI communicator 3077 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3078 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3079 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3080 This value should be the same as the local size used in creating the 3081 y vector for the matrix-vector product y = Ax. 3082 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3083 This value should be the same as the local size used in creating the 3084 x vector for the matrix-vector product y = Ax. 3085 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3086 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3087 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3088 submatrix (same for all local rows) 3089 . d_nnz - array containing the number of nonzero blocks in the various block rows 3090 of the in diagonal portion of the local (possibly different for each block 3091 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3092 and set it even if it is zero. 3093 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3094 submatrix (same for all local rows). 3095 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3096 off-diagonal portion of the local submatrix (possibly different for 3097 each block row) or NULL. 3098 3099 Output Parameter: 3100 . A - the matrix 3101 3102 Options Database Keys: 3103 + -mat_block_size - size of the blocks to use 3104 - -mat_use_hash_table <fact> - set hash table factor 3105 3106 Level: intermediate 3107 3108 Notes: 3109 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3110 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3111 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3112 3113 For good matrix assembly performance 3114 the user should preallocate the matrix storage by setting the parameters 3115 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3116 performance can be increased by more than a factor of 50. 3117 3118 If the *_nnz parameter is given then the *_nz parameter is ignored 3119 3120 A nonzero block is any block that as 1 or more nonzeros in it 3121 3122 The user MUST specify either the local or global matrix dimensions 3123 (possibly both). 3124 3125 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3126 than it must be used on all processors that share the object for that argument. 3127 3128 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3129 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3130 3131 Storage Information: 3132 For a square global matrix we define each processor's diagonal portion 3133 to be its local rows and the corresponding columns (a square submatrix); 3134 each processor's off-diagonal portion encompasses the remainder of the 3135 local matrix (a rectangular submatrix). 3136 3137 The user can specify preallocated storage for the diagonal part of 3138 the local submatrix with either d_nz or d_nnz (not both). Set 3139 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3140 memory allocation. Likewise, specify preallocated storage for the 3141 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3142 3143 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3144 the figure below we depict these three local rows and all columns (0-11). 3145 3146 .vb 3147 0 1 2 3 4 5 6 7 8 9 10 11 3148 -------------------------- 3149 row 3 |o o o d d d o o o o o o 3150 row 4 |o o o d d d o o o o o o 3151 row 5 |o o o d d d o o o o o o 3152 -------------------------- 3153 .ve 3154 3155 Thus, any entries in the d locations are stored in the d (diagonal) 3156 submatrix, and any entries in the o locations are stored in the 3157 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3158 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3159 3160 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3161 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3162 In general, for PDE problems in which most nonzeros are near the diagonal, 3163 one expects `d_nz` >> `o_nz`. 3164 3165 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3166 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3167 @*/ 3168 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3169 { 3170 PetscMPIInt size; 3171 3172 PetscFunctionBegin; 3173 PetscCall(MatCreate(comm, A)); 3174 PetscCall(MatSetSizes(*A, m, n, M, N)); 3175 PetscCallMPI(MPI_Comm_size(comm, &size)); 3176 if (size > 1) { 3177 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3178 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3179 } else { 3180 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3181 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3182 } 3183 PetscFunctionReturn(PETSC_SUCCESS); 3184 } 3185 3186 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3187 { 3188 Mat mat; 3189 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3190 PetscInt len = 0; 3191 3192 PetscFunctionBegin; 3193 *newmat = NULL; 3194 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3195 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3196 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3197 3198 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3199 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3200 if (matin->hash_active) { 3201 PetscCall(MatSetUp(mat)); 3202 } else { 3203 mat->factortype = matin->factortype; 3204 mat->preallocated = PETSC_TRUE; 3205 mat->assembled = PETSC_TRUE; 3206 mat->insertmode = NOT_SET_VALUES; 3207 3208 a = (Mat_MPIBAIJ *)mat->data; 3209 mat->rmap->bs = matin->rmap->bs; 3210 a->bs2 = oldmat->bs2; 3211 a->mbs = oldmat->mbs; 3212 a->nbs = oldmat->nbs; 3213 a->Mbs = oldmat->Mbs; 3214 a->Nbs = oldmat->Nbs; 3215 3216 a->size = oldmat->size; 3217 a->rank = oldmat->rank; 3218 a->donotstash = oldmat->donotstash; 3219 a->roworiented = oldmat->roworiented; 3220 a->rowindices = NULL; 3221 a->rowvalues = NULL; 3222 a->getrowactive = PETSC_FALSE; 3223 a->barray = NULL; 3224 a->rstartbs = oldmat->rstartbs; 3225 a->rendbs = oldmat->rendbs; 3226 a->cstartbs = oldmat->cstartbs; 3227 a->cendbs = oldmat->cendbs; 3228 3229 /* hash table stuff */ 3230 a->ht = NULL; 3231 a->hd = NULL; 3232 a->ht_size = 0; 3233 a->ht_flag = oldmat->ht_flag; 3234 a->ht_fact = oldmat->ht_fact; 3235 a->ht_total_ct = 0; 3236 a->ht_insert_ct = 0; 3237 3238 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3239 if (oldmat->colmap) { 3240 #if defined(PETSC_USE_CTABLE) 3241 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3242 #else 3243 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3244 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3245 #endif 3246 } else a->colmap = NULL; 3247 3248 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3249 PetscCall(PetscMalloc1(len, &a->garray)); 3250 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3251 } else a->garray = NULL; 3252 3253 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3254 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3255 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3256 3257 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3258 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3259 } 3260 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3261 *newmat = mat; 3262 PetscFunctionReturn(PETSC_SUCCESS); 3263 } 3264 3265 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3266 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3267 { 3268 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3269 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3270 PetscScalar *matvals; 3271 3272 PetscFunctionBegin; 3273 PetscCall(PetscViewerSetUp(viewer)); 3274 3275 /* read in matrix header */ 3276 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3277 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3278 M = header[1]; 3279 N = header[2]; 3280 nz = header[3]; 3281 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3282 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3283 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3284 3285 /* set block sizes from the viewer's .info file */ 3286 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3287 /* set local sizes if not set already */ 3288 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3289 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3290 /* set global sizes if not set already */ 3291 if (mat->rmap->N < 0) mat->rmap->N = M; 3292 if (mat->cmap->N < 0) mat->cmap->N = N; 3293 PetscCall(PetscLayoutSetUp(mat->rmap)); 3294 PetscCall(PetscLayoutSetUp(mat->cmap)); 3295 3296 /* check if the matrix sizes are correct */ 3297 PetscCall(MatGetSize(mat, &rows, &cols)); 3298 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3299 PetscCall(MatGetBlockSize(mat, &bs)); 3300 PetscCall(MatGetLocalSize(mat, &m, &n)); 3301 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3302 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3303 mbs = m / bs; 3304 nbs = n / bs; 3305 3306 /* read in row lengths and build row indices */ 3307 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3308 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3309 rowidxs[0] = 0; 3310 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3311 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3312 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3313 3314 /* read in column indices and matrix values */ 3315 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3316 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3317 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3318 3319 { /* preallocate matrix storage */ 3320 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3321 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3322 PetscBool sbaij, done; 3323 PetscInt *d_nnz, *o_nnz; 3324 3325 PetscCall(PetscBTCreate(nbs, &bt)); 3326 PetscCall(PetscHSetICreate(&ht)); 3327 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3328 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3329 for (i = 0; i < mbs; i++) { 3330 PetscCall(PetscBTMemzero(nbs, bt)); 3331 PetscCall(PetscHSetIClear(ht)); 3332 for (k = 0; k < bs; k++) { 3333 PetscInt row = bs * i + k; 3334 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3335 PetscInt col = colidxs[j]; 3336 if (!sbaij || col >= row) { 3337 if (col >= cs && col < ce) { 3338 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3339 } else { 3340 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3341 if (done) o_nnz[i]++; 3342 } 3343 } 3344 } 3345 } 3346 } 3347 PetscCall(PetscBTDestroy(&bt)); 3348 PetscCall(PetscHSetIDestroy(&ht)); 3349 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3350 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3351 PetscCall(PetscFree2(d_nnz, o_nnz)); 3352 } 3353 3354 /* store matrix values */ 3355 for (i = 0; i < m; i++) { 3356 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3357 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3358 } 3359 3360 PetscCall(PetscFree(rowidxs)); 3361 PetscCall(PetscFree2(colidxs, matvals)); 3362 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3363 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3364 PetscFunctionReturn(PETSC_SUCCESS); 3365 } 3366 3367 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3368 { 3369 PetscBool isbinary; 3370 3371 PetscFunctionBegin; 3372 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3373 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3374 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3375 PetscFunctionReturn(PETSC_SUCCESS); 3376 } 3377 3378 /*@ 3379 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3380 3381 Input Parameters: 3382 + mat - the matrix 3383 - fact - factor 3384 3385 Options Database Key: 3386 . -mat_use_hash_table <fact> - provide the factor 3387 3388 Level: advanced 3389 3390 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3391 @*/ 3392 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3393 { 3394 PetscFunctionBegin; 3395 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 3399 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3400 { 3401 Mat_MPIBAIJ *baij; 3402 3403 PetscFunctionBegin; 3404 baij = (Mat_MPIBAIJ *)mat->data; 3405 baij->ht_fact = fact; 3406 PetscFunctionReturn(PETSC_SUCCESS); 3407 } 3408 3409 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3410 { 3411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3412 PetscBool flg; 3413 3414 PetscFunctionBegin; 3415 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3416 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3417 if (Ad) *Ad = a->A; 3418 if (Ao) *Ao = a->B; 3419 if (colmap) *colmap = a->garray; 3420 PetscFunctionReturn(PETSC_SUCCESS); 3421 } 3422 3423 /* 3424 Special version for direct calls from Fortran (to eliminate two function call overheads 3425 */ 3426 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3427 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3428 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3429 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3430 #endif 3431 3432 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3433 /*@C 3434 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3435 3436 Collective 3437 3438 Input Parameters: 3439 + matin - the matrix 3440 . min - number of input rows 3441 . im - input rows 3442 . nin - number of input columns 3443 . in - input columns 3444 . v - numerical values input 3445 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3446 3447 Level: advanced 3448 3449 Developer Notes: 3450 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3451 3452 .seealso: `Mat`, `MatSetValuesBlocked()` 3453 @*/ 3454 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3455 { 3456 /* convert input arguments to C version */ 3457 Mat mat = *matin; 3458 PetscInt m = *min, n = *nin; 3459 InsertMode addv = *addvin; 3460 3461 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3462 const MatScalar *value; 3463 MatScalar *barray = baij->barray; 3464 PetscBool roworiented = baij->roworiented; 3465 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3466 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3467 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3468 3469 PetscFunctionBegin; 3470 /* tasks normally handled by MatSetValuesBlocked() */ 3471 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3472 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3473 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3474 if (mat->assembled) { 3475 mat->was_assembled = PETSC_TRUE; 3476 mat->assembled = PETSC_FALSE; 3477 } 3478 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3479 3480 if (!barray) { 3481 PetscCall(PetscMalloc1(bs2, &barray)); 3482 baij->barray = barray; 3483 } 3484 3485 if (roworiented) stepval = (n - 1) * bs; 3486 else stepval = (m - 1) * bs; 3487 3488 for (i = 0; i < m; i++) { 3489 if (im[i] < 0) continue; 3490 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3491 if (im[i] >= rstart && im[i] < rend) { 3492 row = im[i] - rstart; 3493 for (j = 0; j < n; j++) { 3494 /* If NumCol = 1 then a copy is not required */ 3495 if ((roworiented) && (n == 1)) { 3496 barray = (MatScalar *)v + i * bs2; 3497 } else if ((!roworiented) && (m == 1)) { 3498 barray = (MatScalar *)v + j * bs2; 3499 } else { /* Here a copy is required */ 3500 if (roworiented) { 3501 value = v + i * (stepval + bs) * bs + j * bs; 3502 } else { 3503 value = v + j * (stepval + bs) * bs + i * bs; 3504 } 3505 for (ii = 0; ii < bs; ii++, value += stepval) { 3506 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3507 } 3508 barray -= bs2; 3509 } 3510 3511 if (in[j] >= cstart && in[j] < cend) { 3512 col = in[j] - cstart; 3513 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3514 } else if (in[j] < 0) { 3515 continue; 3516 } else { 3517 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3518 if (mat->was_assembled) { 3519 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3520 3521 #if defined(PETSC_USE_DEBUG) 3522 #if defined(PETSC_USE_CTABLE) 3523 { 3524 PetscInt data; 3525 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3526 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3527 } 3528 #else 3529 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3530 #endif 3531 #endif 3532 #if defined(PETSC_USE_CTABLE) 3533 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3534 col = (col - 1) / bs; 3535 #else 3536 col = (baij->colmap[in[j]] - 1) / bs; 3537 #endif 3538 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3539 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3540 col = in[j]; 3541 } 3542 } else col = in[j]; 3543 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3544 } 3545 } 3546 } else { 3547 if (!baij->donotstash) { 3548 if (roworiented) { 3549 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3550 } else { 3551 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3552 } 3553 } 3554 } 3555 } 3556 3557 /* task normally handled by MatSetValuesBlocked() */ 3558 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3559 PetscFunctionReturn(PETSC_SUCCESS); 3560 } 3561 3562 /*@ 3563 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3564 3565 Collective 3566 3567 Input Parameters: 3568 + comm - MPI communicator 3569 . bs - the block size, only a block size of 1 is supported 3570 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3571 . n - This value should be the same as the local size used in creating the 3572 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3573 calculated if `N` is given) For square matrices `n` is almost always `m`. 3574 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3575 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3576 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3577 . j - column indices 3578 - a - matrix values 3579 3580 Output Parameter: 3581 . mat - the matrix 3582 3583 Level: intermediate 3584 3585 Notes: 3586 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3587 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3588 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3589 3590 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3591 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3592 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3593 with column-major ordering within blocks. 3594 3595 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3596 3597 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3598 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3599 @*/ 3600 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3601 { 3602 PetscFunctionBegin; 3603 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3604 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3605 PetscCall(MatCreate(comm, mat)); 3606 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3607 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3608 PetscCall(MatSetBlockSize(*mat, bs)); 3609 PetscCall(MatSetUp(*mat)); 3610 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3611 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3612 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3613 PetscFunctionReturn(PETSC_SUCCESS); 3614 } 3615 3616 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3617 { 3618 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3619 PetscInt *indx; 3620 PetscScalar *values; 3621 3622 PetscFunctionBegin; 3623 PetscCall(MatGetSize(inmat, &m, &N)); 3624 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3625 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3626 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3627 PetscInt *bindx, rmax = a->rmax, j; 3628 PetscMPIInt rank, size; 3629 3630 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3631 mbs = m / bs; 3632 Nbs = N / cbs; 3633 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3634 nbs = n / cbs; 3635 3636 PetscCall(PetscMalloc1(rmax, &bindx)); 3637 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3638 3639 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3640 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3641 if (rank == size - 1) { 3642 /* Check sum(nbs) = Nbs */ 3643 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3644 } 3645 3646 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3647 for (i = 0; i < mbs; i++) { 3648 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3649 nnz = nnz / bs; 3650 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3651 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3652 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3653 } 3654 PetscCall(PetscFree(bindx)); 3655 3656 PetscCall(MatCreate(comm, outmat)); 3657 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3658 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3659 PetscCall(MatSetType(*outmat, MATBAIJ)); 3660 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3661 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3662 MatPreallocateEnd(dnz, onz); 3663 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3664 } 3665 3666 /* numeric phase */ 3667 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3668 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3669 3670 for (i = 0; i < m; i++) { 3671 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3672 Ii = i + rstart; 3673 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3674 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3675 } 3676 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3677 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3678 PetscFunctionReturn(PETSC_SUCCESS); 3679 } 3680