1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatGetRowSumAbs_MPIBAIJ(Mat A, Vec v) 101 { 102 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 103 Vec vB, vA; 104 105 PetscFunctionBegin; 106 PetscCall(MatCreateVecs(a->A, NULL, &vA)); 107 PetscCall(MatGetRowSumAbs(a->A, vA)); 108 PetscCall(MatCreateVecs(a->B, NULL, &vB)); 109 PetscCall(MatGetRowSumAbs(a->B, vB)); 110 PetscCall(VecAXPY(vA, 1.0, vB)); 111 PetscCall(VecDestroy(&vB)); 112 PetscCall(VecCopy(vA, v)); 113 PetscCall(VecDestroy(&vA)); 114 PetscFunctionReturn(PETSC_SUCCESS); 115 } 116 117 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 118 { 119 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 120 121 PetscFunctionBegin; 122 PetscCall(MatStoreValues(aij->A)); 123 PetscCall(MatStoreValues(aij->B)); 124 PetscFunctionReturn(PETSC_SUCCESS); 125 } 126 127 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 128 { 129 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 130 131 PetscFunctionBegin; 132 PetscCall(MatRetrieveValues(aij->A)); 133 PetscCall(MatRetrieveValues(aij->B)); 134 PetscFunctionReturn(PETSC_SUCCESS); 135 } 136 137 /* 138 Local utility routine that creates a mapping from the global column 139 number to the local number in the off-diagonal part of the local 140 storage of the matrix. This is done in a non scalable way since the 141 length of colmap equals the global matrix length. 142 */ 143 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 144 { 145 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 146 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 147 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 148 149 PetscFunctionBegin; 150 #if defined(PETSC_USE_CTABLE) 151 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 152 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 153 #else 154 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 155 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 156 #endif 157 PetscFunctionReturn(PETSC_SUCCESS); 158 } 159 160 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 161 do { \ 162 brow = row / bs; \ 163 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 164 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 165 rmax = aimax[brow]; \ 166 nrow = ailen[brow]; \ 167 bcol = col / bs; \ 168 ridx = row % bs; \ 169 cidx = col % bs; \ 170 low = 0; \ 171 high = nrow; \ 172 while (high - low > 3) { \ 173 t = (low + high) / 2; \ 174 if (rp[t] > bcol) high = t; \ 175 else low = t; \ 176 } \ 177 for (_i = low; _i < high; _i++) { \ 178 if (rp[_i] > bcol) break; \ 179 if (rp[_i] == bcol) { \ 180 bap = ap + bs2 * _i + bs * cidx + ridx; \ 181 if (addv == ADD_VALUES) *bap += value; \ 182 else *bap = value; \ 183 goto a_noinsert; \ 184 } \ 185 } \ 186 if (a->nonew == 1) goto a_noinsert; \ 187 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 188 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 189 N = nrow++ - 1; \ 190 /* shift up all the later entries in this row */ \ 191 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 192 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 193 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 194 rp[_i] = bcol; \ 195 ap[bs2 * _i + bs * cidx + ridx] = value; \ 196 a_noinsert:; \ 197 ailen[brow] = nrow; \ 198 } while (0) 199 200 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 201 do { \ 202 brow = row / bs; \ 203 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 204 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 205 rmax = bimax[brow]; \ 206 nrow = bilen[brow]; \ 207 bcol = col / bs; \ 208 ridx = row % bs; \ 209 cidx = col % bs; \ 210 low = 0; \ 211 high = nrow; \ 212 while (high - low > 3) { \ 213 t = (low + high) / 2; \ 214 if (rp[t] > bcol) high = t; \ 215 else low = t; \ 216 } \ 217 for (_i = low; _i < high; _i++) { \ 218 if (rp[_i] > bcol) break; \ 219 if (rp[_i] == bcol) { \ 220 bap = ap + bs2 * _i + bs * cidx + ridx; \ 221 if (addv == ADD_VALUES) *bap += value; \ 222 else *bap = value; \ 223 goto b_noinsert; \ 224 } \ 225 } \ 226 if (b->nonew == 1) goto b_noinsert; \ 227 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 228 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 229 N = nrow++ - 1; \ 230 /* shift up all the later entries in this row */ \ 231 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 232 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 233 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 234 rp[_i] = bcol; \ 235 ap[bs2 * _i + bs * cidx + ridx] = value; \ 236 b_noinsert:; \ 237 bilen[brow] = nrow; \ 238 } while (0) 239 240 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 241 { 242 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 243 MatScalar value; 244 PetscBool roworiented = baij->roworiented; 245 PetscInt i, j, row, col; 246 PetscInt rstart_orig = mat->rmap->rstart; 247 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 248 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 249 250 /* Some Variables required in the macro */ 251 Mat A = baij->A; 252 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 253 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 254 MatScalar *aa = a->a; 255 256 Mat B = baij->B; 257 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 258 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 259 MatScalar *ba = b->a; 260 261 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 262 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 263 MatScalar *ap, *bap; 264 265 PetscFunctionBegin; 266 for (i = 0; i < m; i++) { 267 if (im[i] < 0) continue; 268 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 269 if (im[i] >= rstart_orig && im[i] < rend_orig) { 270 row = im[i] - rstart_orig; 271 for (j = 0; j < n; j++) { 272 if (in[j] >= cstart_orig && in[j] < cend_orig) { 273 col = in[j] - cstart_orig; 274 if (roworiented) value = v[i * n + j]; 275 else value = v[i + j * m]; 276 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 277 } else if (in[j] < 0) { 278 continue; 279 } else { 280 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 281 if (mat->was_assembled) { 282 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 283 #if defined(PETSC_USE_CTABLE) 284 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 285 col = col - 1; 286 #else 287 col = baij->colmap[in[j] / bs] - 1; 288 #endif 289 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 290 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 291 col = in[j]; 292 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 293 B = baij->B; 294 b = (Mat_SeqBAIJ *)(B)->data; 295 bimax = b->imax; 296 bi = b->i; 297 bilen = b->ilen; 298 bj = b->j; 299 ba = b->a; 300 } else { 301 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 302 col += in[j] % bs; 303 } 304 } else col = in[j]; 305 if (roworiented) value = v[i * n + j]; 306 else value = v[i + j * m]; 307 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 308 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 309 } 310 } 311 } else { 312 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 313 if (!baij->donotstash) { 314 mat->assembled = PETSC_FALSE; 315 if (roworiented) { 316 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 317 } else { 318 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 319 } 320 } 321 } 322 } 323 PetscFunctionReturn(PETSC_SUCCESS); 324 } 325 326 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 327 { 328 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 329 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 330 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 331 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 332 PetscBool roworiented = a->roworiented; 333 const PetscScalar *value = v; 334 MatScalar *ap, *aa = a->a, *bap; 335 336 PetscFunctionBegin; 337 rp = aj + ai[row]; 338 ap = aa + bs2 * ai[row]; 339 rmax = imax[row]; 340 nrow = ailen[row]; 341 value = v; 342 low = 0; 343 high = nrow; 344 while (high - low > 7) { 345 t = (low + high) / 2; 346 if (rp[t] > col) high = t; 347 else low = t; 348 } 349 for (i = low; i < high; i++) { 350 if (rp[i] > col) break; 351 if (rp[i] == col) { 352 bap = ap + bs2 * i; 353 if (roworiented) { 354 if (is == ADD_VALUES) { 355 for (ii = 0; ii < bs; ii++) { 356 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 357 } 358 } else { 359 for (ii = 0; ii < bs; ii++) { 360 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 361 } 362 } 363 } else { 364 if (is == ADD_VALUES) { 365 for (ii = 0; ii < bs; ii++, value += bs) { 366 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 367 bap += bs; 368 } 369 } else { 370 for (ii = 0; ii < bs; ii++, value += bs) { 371 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 372 bap += bs; 373 } 374 } 375 } 376 goto noinsert2; 377 } 378 } 379 if (nonew == 1) goto noinsert2; 380 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 381 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 382 N = nrow++ - 1; 383 high++; 384 /* shift up all the later entries in this row */ 385 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 386 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 387 rp[i] = col; 388 bap = ap + bs2 * i; 389 if (roworiented) { 390 for (ii = 0; ii < bs; ii++) { 391 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 392 } 393 } else { 394 for (ii = 0; ii < bs; ii++) { 395 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 396 } 397 } 398 noinsert2:; 399 ailen[row] = nrow; 400 PetscFunctionReturn(PETSC_SUCCESS); 401 } 402 403 /* 404 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 405 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 406 */ 407 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 408 { 409 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 410 const PetscScalar *value; 411 MatScalar *barray = baij->barray; 412 PetscBool roworiented = baij->roworiented; 413 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 414 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 415 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 416 417 PetscFunctionBegin; 418 if (!barray) { 419 PetscCall(PetscMalloc1(bs2, &barray)); 420 baij->barray = barray; 421 } 422 423 if (roworiented) stepval = (n - 1) * bs; 424 else stepval = (m - 1) * bs; 425 426 for (i = 0; i < m; i++) { 427 if (im[i] < 0) continue; 428 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 429 if (im[i] >= rstart && im[i] < rend) { 430 row = im[i] - rstart; 431 for (j = 0; j < n; j++) { 432 /* If NumCol = 1 then a copy is not required */ 433 if ((roworiented) && (n == 1)) { 434 barray = (MatScalar *)v + i * bs2; 435 } else if ((!roworiented) && (m == 1)) { 436 barray = (MatScalar *)v + j * bs2; 437 } else { /* Here a copy is required */ 438 if (roworiented) { 439 value = v + (i * (stepval + bs) + j) * bs; 440 } else { 441 value = v + (j * (stepval + bs) + i) * bs; 442 } 443 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 444 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 445 barray += bs; 446 } 447 barray -= bs2; 448 } 449 450 if (in[j] >= cstart && in[j] < cend) { 451 col = in[j] - cstart; 452 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 453 } else if (in[j] < 0) { 454 continue; 455 } else { 456 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 457 if (mat->was_assembled) { 458 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 459 460 #if defined(PETSC_USE_CTABLE) 461 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 462 col = col < 1 ? -1 : (col - 1) / bs; 463 #else 464 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 465 #endif 466 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 467 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 468 col = in[j]; 469 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 470 } else col = in[j]; 471 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 472 } 473 } 474 } else { 475 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 476 if (!baij->donotstash) { 477 if (roworiented) { 478 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 479 } else { 480 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 481 } 482 } 483 } 484 } 485 PetscFunctionReturn(PETSC_SUCCESS); 486 } 487 488 #define HASH_KEY 0.6180339887 489 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 490 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 491 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 492 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 493 { 494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 495 PetscBool roworiented = baij->roworiented; 496 PetscInt i, j, row, col; 497 PetscInt rstart_orig = mat->rmap->rstart; 498 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 499 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 500 PetscReal tmp; 501 MatScalar **HD = baij->hd, value; 502 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 503 504 PetscFunctionBegin; 505 for (i = 0; i < m; i++) { 506 if (PetscDefined(USE_DEBUG)) { 507 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 508 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 509 } 510 row = im[i]; 511 if (row >= rstart_orig && row < rend_orig) { 512 for (j = 0; j < n; j++) { 513 col = in[j]; 514 if (roworiented) value = v[i * n + j]; 515 else value = v[i + j * m]; 516 /* Look up PetscInto the Hash Table */ 517 key = (row / bs) * Nbs + (col / bs) + 1; 518 h1 = HASH(size, key, tmp); 519 520 idx = h1; 521 if (PetscDefined(USE_DEBUG)) { 522 insert_ct++; 523 total_ct++; 524 if (HT[idx] != key) { 525 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 526 if (idx == size) { 527 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 528 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 529 } 530 } 531 } else if (HT[idx] != key) { 532 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 533 if (idx == size) { 534 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 595 if (idx == size) { 596 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 602 if (idx == size) { 603 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 604 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 605 } 606 } 607 baij_a = HD[idx]; 608 if (roworiented) { 609 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 610 /* value = v + (i*(stepval+bs)+j)*bs; */ 611 value = v_t; 612 v_t += bs; 613 if (addv == ADD_VALUES) { 614 for (ii = 0; ii < bs; ii++, value += stepval) { 615 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 616 } 617 } else { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 620 } 621 } 622 } else { 623 value = v + j * (stepval + bs) * bs + i * bs; 624 if (addv == ADD_VALUES) { 625 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 626 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 627 } 628 } else { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 631 } 632 } 633 } 634 } 635 } else { 636 if (!baij->donotstash) { 637 if (roworiented) { 638 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 639 } else { 640 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } 642 } 643 } 644 } 645 if (PetscDefined(USE_DEBUG)) { 646 baij->ht_total_ct += total_ct; 647 baij->ht_insert_ct += insert_ct; 648 } 649 PetscFunctionReturn(PETSC_SUCCESS); 650 } 651 652 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 653 { 654 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 655 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 656 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 657 658 PetscFunctionBegin; 659 for (i = 0; i < m; i++) { 660 if (idxm[i] < 0) continue; /* negative row */ 661 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 662 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 663 row = idxm[i] - bsrstart; 664 for (j = 0; j < n; j++) { 665 if (idxn[j] < 0) continue; /* negative column */ 666 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 667 if (idxn[j] >= bscstart && idxn[j] < bscend) { 668 col = idxn[j] - bscstart; 669 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 670 } else { 671 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 672 #if defined(PETSC_USE_CTABLE) 673 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 674 data--; 675 #else 676 data = baij->colmap[idxn[j] / bs] - 1; 677 #endif 678 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 679 else { 680 col = data + idxn[j] % bs; 681 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 682 } 683 } 684 } 685 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 686 } 687 PetscFunctionReturn(PETSC_SUCCESS); 688 } 689 690 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 691 { 692 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 693 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 694 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 695 PetscReal sum = 0.0; 696 MatScalar *v; 697 698 PetscFunctionBegin; 699 if (baij->size == 1) { 700 PetscCall(MatNorm(baij->A, type, nrm)); 701 } else { 702 if (type == NORM_FROBENIUS) { 703 v = amat->a; 704 nz = amat->nz * bs2; 705 for (i = 0; i < nz; i++) { 706 sum += PetscRealPart(PetscConj(*v) * (*v)); 707 v++; 708 } 709 v = bmat->a; 710 nz = bmat->nz * bs2; 711 for (i = 0; i < nz; i++) { 712 sum += PetscRealPart(PetscConj(*v) * (*v)); 713 v++; 714 } 715 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 716 *nrm = PetscSqrtReal(*nrm); 717 } else if (type == NORM_1) { /* max column sum */ 718 PetscReal *tmp, *tmp2; 719 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 720 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 721 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 722 v = amat->a; 723 jj = amat->j; 724 for (i = 0; i < amat->nz; i++) { 725 for (j = 0; j < bs; j++) { 726 col = bs * (cstart + *jj) + j; /* column index */ 727 for (row = 0; row < bs; row++) { 728 tmp[col] += PetscAbsScalar(*v); 729 v++; 730 } 731 } 732 jj++; 733 } 734 v = bmat->a; 735 jj = bmat->j; 736 for (i = 0; i < bmat->nz; i++) { 737 for (j = 0; j < bs; j++) { 738 col = bs * garray[*jj] + j; 739 for (row = 0; row < bs; row++) { 740 tmp[col] += PetscAbsScalar(*v); 741 v++; 742 } 743 } 744 jj++; 745 } 746 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 747 *nrm = 0.0; 748 for (j = 0; j < mat->cmap->N; j++) { 749 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 750 } 751 PetscCall(PetscFree(tmp)); 752 PetscCall(PetscFree(tmp2)); 753 } else if (type == NORM_INFINITY) { /* max row sum */ 754 PetscReal *sums; 755 PetscCall(PetscMalloc1(bs, &sums)); 756 sum = 0.0; 757 for (j = 0; j < amat->mbs; j++) { 758 for (row = 0; row < bs; row++) sums[row] = 0.0; 759 v = amat->a + bs2 * amat->i[j]; 760 nz = amat->i[j + 1] - amat->i[j]; 761 for (i = 0; i < nz; i++) { 762 for (col = 0; col < bs; col++) { 763 for (row = 0; row < bs; row++) { 764 sums[row] += PetscAbsScalar(*v); 765 v++; 766 } 767 } 768 } 769 v = bmat->a + bs2 * bmat->i[j]; 770 nz = bmat->i[j + 1] - bmat->i[j]; 771 for (i = 0; i < nz; i++) { 772 for (col = 0; col < bs; col++) { 773 for (row = 0; row < bs; row++) { 774 sums[row] += PetscAbsScalar(*v); 775 v++; 776 } 777 } 778 } 779 for (row = 0; row < bs; row++) { 780 if (sums[row] > sum) sum = sums[row]; 781 } 782 } 783 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 784 PetscCall(PetscFree(sums)); 785 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 786 } 787 PetscFunctionReturn(PETSC_SUCCESS); 788 } 789 790 /* 791 Creates the hash table, and sets the table 792 This table is created only once. 793 If new entried need to be added to the matrix 794 then the hash table has to be destroyed and 795 recreated. 796 */ 797 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 798 { 799 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 800 Mat A = baij->A, B = baij->B; 801 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 802 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 803 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 804 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 805 PetscInt *HT, key; 806 MatScalar **HD; 807 PetscReal tmp; 808 #if defined(PETSC_USE_INFO) 809 PetscInt ct = 0, max = 0; 810 #endif 811 812 PetscFunctionBegin; 813 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 814 815 baij->ht_size = (PetscInt)(factor * nz); 816 ht_size = baij->ht_size; 817 818 /* Allocate Memory for Hash Table */ 819 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 820 HD = baij->hd; 821 HT = baij->ht; 822 823 /* Loop Over A */ 824 for (i = 0; i < a->mbs; i++) { 825 for (j = ai[i]; j < ai[i + 1]; j++) { 826 row = i + rstart; 827 col = aj[j] + cstart; 828 829 key = row * Nbs + col + 1; 830 h1 = HASH(ht_size, key, tmp); 831 for (k = 0; k < ht_size; k++) { 832 if (!HT[(h1 + k) % ht_size]) { 833 HT[(h1 + k) % ht_size] = key; 834 HD[(h1 + k) % ht_size] = a->a + j * bs2; 835 break; 836 #if defined(PETSC_USE_INFO) 837 } else { 838 ct++; 839 #endif 840 } 841 } 842 #if defined(PETSC_USE_INFO) 843 if (k > max) max = k; 844 #endif 845 } 846 } 847 /* Loop Over B */ 848 for (i = 0; i < b->mbs; i++) { 849 for (j = bi[i]; j < bi[i + 1]; j++) { 850 row = i + rstart; 851 col = garray[bj[j]]; 852 key = row * Nbs + col + 1; 853 h1 = HASH(ht_size, key, tmp); 854 for (k = 0; k < ht_size; k++) { 855 if (!HT[(h1 + k) % ht_size]) { 856 HT[(h1 + k) % ht_size] = key; 857 HD[(h1 + k) % ht_size] = b->a + j * bs2; 858 break; 859 #if defined(PETSC_USE_INFO) 860 } else { 861 ct++; 862 #endif 863 } 864 } 865 #if defined(PETSC_USE_INFO) 866 if (k > max) max = k; 867 #endif 868 } 869 } 870 871 /* Print Summary */ 872 #if defined(PETSC_USE_INFO) 873 for (i = 0, j = 0; i < ht_size; i++) { 874 if (HT[i]) j++; 875 } 876 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 877 #endif 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 882 { 883 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 884 PetscInt nstash, reallocs; 885 886 PetscFunctionBegin; 887 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 888 889 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 890 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 891 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 892 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 893 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscFunctionReturn(PETSC_SUCCESS); 896 } 897 898 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 899 { 900 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 901 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 902 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 903 PetscInt *row, *col; 904 PetscBool r1, r2, r3, other_disassembled; 905 MatScalar *val; 906 PetscMPIInt n; 907 908 PetscFunctionBegin; 909 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 910 if (!baij->donotstash && !mat->nooffprocentries) { 911 while (1) { 912 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 913 if (!flg) break; 914 915 for (i = 0; i < n;) { 916 /* Now identify the consecutive vals belonging to the same row */ 917 for (j = i, rstart = row[j]; j < n; j++) { 918 if (row[j] != rstart) break; 919 } 920 if (j < n) ncols = j - i; 921 else ncols = n - i; 922 /* Now assemble all these values with a single function call */ 923 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 924 i = j; 925 } 926 } 927 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 928 /* Now process the block-stash. Since the values are stashed column-oriented, 929 set the row-oriented flag to column-oriented, and after MatSetValues() 930 restore the original flags */ 931 r1 = baij->roworiented; 932 r2 = a->roworiented; 933 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 934 935 baij->roworiented = PETSC_FALSE; 936 a->roworiented = PETSC_FALSE; 937 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 938 while (1) { 939 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 940 if (!flg) break; 941 942 for (i = 0; i < n;) { 943 /* Now identify the consecutive vals belonging to the same row */ 944 for (j = i, rstart = row[j]; j < n; j++) { 945 if (row[j] != rstart) break; 946 } 947 if (j < n) ncols = j - i; 948 else ncols = n - i; 949 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 950 i = j; 951 } 952 } 953 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 954 955 baij->roworiented = r1; 956 a->roworiented = r2; 957 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 958 } 959 960 PetscCall(MatAssemblyBegin(baij->A, mode)); 961 PetscCall(MatAssemblyEnd(baij->A, mode)); 962 963 /* determine if any processor has disassembled, if so we must 964 also disassemble ourselves, in order that we may reassemble. */ 965 /* 966 if nonzero structure of submatrix B cannot change then we know that 967 no processor disassembled thus we can skip this stuff 968 */ 969 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 970 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 971 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 972 } 973 974 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 975 PetscCall(MatAssemblyBegin(baij->B, mode)); 976 PetscCall(MatAssemblyEnd(baij->B, mode)); 977 978 #if defined(PETSC_USE_INFO) 979 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 980 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 981 982 baij->ht_total_ct = 0; 983 baij->ht_insert_ct = 0; 984 } 985 #endif 986 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 987 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 988 989 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 990 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 991 } 992 993 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 994 995 baij->rowvalues = NULL; 996 997 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 998 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 999 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1000 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1001 } 1002 PetscFunctionReturn(PETSC_SUCCESS); 1003 } 1004 1005 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1006 #include <petscdraw.h> 1007 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1008 { 1009 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1010 PetscMPIInt rank = baij->rank; 1011 PetscInt bs = mat->rmap->bs; 1012 PetscBool iascii, isdraw; 1013 PetscViewer sviewer; 1014 PetscViewerFormat format; 1015 1016 PetscFunctionBegin; 1017 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1018 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1019 if (iascii) { 1020 PetscCall(PetscViewerGetFormat(viewer, &format)); 1021 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1022 MatInfo info; 1023 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1024 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1025 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1026 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1027 mat->rmap->bs, (double)info.memory)); 1028 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1030 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(PetscViewerFlush(viewer)); 1033 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1034 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1035 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1036 PetscFunctionReturn(PETSC_SUCCESS); 1037 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1038 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1039 PetscFunctionReturn(PETSC_SUCCESS); 1040 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } 1043 } 1044 1045 if (isdraw) { 1046 PetscDraw draw; 1047 PetscBool isnull; 1048 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1049 PetscCall(PetscDrawIsNull(draw, &isnull)); 1050 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1051 } 1052 1053 { 1054 /* assemble the entire matrix onto first processor. */ 1055 Mat A; 1056 Mat_SeqBAIJ *Aloc; 1057 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1058 MatScalar *a; 1059 const char *matname; 1060 1061 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1062 /* Perhaps this should be the type of mat? */ 1063 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1064 if (rank == 0) { 1065 PetscCall(MatSetSizes(A, M, N, M, N)); 1066 } else { 1067 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1068 } 1069 PetscCall(MatSetType(A, MATMPIBAIJ)); 1070 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1071 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1072 1073 /* copy over the A part */ 1074 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1075 ai = Aloc->i; 1076 aj = Aloc->j; 1077 a = Aloc->a; 1078 PetscCall(PetscMalloc1(bs, &rvals)); 1079 1080 for (i = 0; i < mbs; i++) { 1081 rvals[0] = bs * (baij->rstartbs + i); 1082 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1083 for (j = ai[i]; j < ai[i + 1]; j++) { 1084 col = (baij->cstartbs + aj[j]) * bs; 1085 for (k = 0; k < bs; k++) { 1086 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1087 col++; 1088 a += bs; 1089 } 1090 } 1091 } 1092 /* copy over the B part */ 1093 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1094 ai = Aloc->i; 1095 aj = Aloc->j; 1096 a = Aloc->a; 1097 for (i = 0; i < mbs; i++) { 1098 rvals[0] = bs * (baij->rstartbs + i); 1099 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1100 for (j = ai[i]; j < ai[i + 1]; j++) { 1101 col = baij->garray[aj[j]] * bs; 1102 for (k = 0; k < bs; k++) { 1103 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1104 col++; 1105 a += bs; 1106 } 1107 } 1108 } 1109 PetscCall(PetscFree(rvals)); 1110 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1111 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1112 /* 1113 Everyone has to call to draw the matrix since the graphics waits are 1114 synchronized across all processors that share the PetscDraw object 1115 */ 1116 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1117 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1118 if (rank == 0) { 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1120 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1121 } 1122 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1123 PetscCall(MatDestroy(&A)); 1124 } 1125 PetscFunctionReturn(PETSC_SUCCESS); 1126 } 1127 1128 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1129 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1130 { 1131 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1132 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1133 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1134 const PetscInt *garray = aij->garray; 1135 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1136 PetscInt64 nz, hnz; 1137 PetscInt *rowlens, *colidxs; 1138 PetscScalar *matvals; 1139 PetscMPIInt rank; 1140 1141 PetscFunctionBegin; 1142 PetscCall(PetscViewerSetUp(viewer)); 1143 1144 M = mat->rmap->N; 1145 N = mat->cmap->N; 1146 m = mat->rmap->n; 1147 rs = mat->rmap->rstart; 1148 cs = mat->cmap->rstart; 1149 bs = mat->rmap->bs; 1150 nz = bs * bs * (A->nz + B->nz); 1151 1152 /* write matrix header */ 1153 header[0] = MAT_FILE_CLASSID; 1154 header[1] = M; 1155 header[2] = N; 1156 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1157 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1158 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1159 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1160 1161 /* fill in and store row lengths */ 1162 PetscCall(PetscMalloc1(m, &rowlens)); 1163 for (cnt = 0, i = 0; i < A->mbs; i++) 1164 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1165 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1166 PetscCall(PetscFree(rowlens)); 1167 1168 /* fill in and store column indices */ 1169 PetscCall(PetscMalloc1(nz, &colidxs)); 1170 for (cnt = 0, i = 0; i < A->mbs; i++) { 1171 for (k = 0; k < bs; k++) { 1172 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1173 if (garray[B->j[jb]] > cs / bs) break; 1174 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1175 } 1176 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1177 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1178 for (; jb < B->i[i + 1]; jb++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1180 } 1181 } 1182 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1183 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1184 PetscCall(PetscFree(colidxs)); 1185 1186 /* fill in and store nonzero values */ 1187 PetscCall(PetscMalloc1(nz, &matvals)); 1188 for (cnt = 0, i = 0; i < A->mbs; i++) { 1189 for (k = 0; k < bs; k++) { 1190 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1191 if (garray[B->j[jb]] > cs / bs) break; 1192 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1193 } 1194 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1195 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1196 for (; jb < B->i[i + 1]; jb++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1198 } 1199 } 1200 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1201 PetscCall(PetscFree(matvals)); 1202 1203 /* write block size option to the viewer's .info file */ 1204 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1205 PetscFunctionReturn(PETSC_SUCCESS); 1206 } 1207 1208 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1209 { 1210 PetscBool iascii, isdraw, issocket, isbinary; 1211 1212 PetscFunctionBegin; 1213 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1214 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1217 if (iascii || isdraw || issocket) { 1218 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1219 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1226 PetscInt nt; 1227 1228 PetscFunctionBegin; 1229 PetscCall(VecGetLocalSize(xx, &nt)); 1230 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1231 PetscCall(VecGetLocalSize(yy, &nt)); 1232 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1233 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1234 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1235 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1237 PetscFunctionReturn(PETSC_SUCCESS); 1238 } 1239 1240 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1241 { 1242 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1243 1244 PetscFunctionBegin; 1245 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1246 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1247 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1249 PetscFunctionReturn(PETSC_SUCCESS); 1250 } 1251 1252 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1253 { 1254 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1255 1256 PetscFunctionBegin; 1257 /* do nondiagonal part */ 1258 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1259 /* do local part */ 1260 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1261 /* add partial results together */ 1262 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1264 PetscFunctionReturn(PETSC_SUCCESS); 1265 } 1266 1267 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1268 { 1269 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1270 1271 PetscFunctionBegin; 1272 /* do nondiagonal part */ 1273 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1274 /* do local part */ 1275 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1276 /* add partial results together */ 1277 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1279 PetscFunctionReturn(PETSC_SUCCESS); 1280 } 1281 1282 /* 1283 This only works correctly for square matrices where the subblock A->A is the 1284 diagonal block 1285 */ 1286 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1287 { 1288 PetscFunctionBegin; 1289 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1290 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1291 PetscFunctionReturn(PETSC_SUCCESS); 1292 } 1293 1294 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1295 { 1296 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1297 1298 PetscFunctionBegin; 1299 PetscCall(MatScale(a->A, aa)); 1300 PetscCall(MatScale(a->B, aa)); 1301 PetscFunctionReturn(PETSC_SUCCESS); 1302 } 1303 1304 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1305 { 1306 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1307 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1308 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1309 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1310 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1311 1312 PetscFunctionBegin; 1313 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1314 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1315 mat->getrowactive = PETSC_TRUE; 1316 1317 if (!mat->rowvalues && (idx || v)) { 1318 /* 1319 allocate enough space to hold information from the longest row. 1320 */ 1321 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1322 PetscInt max = 1, mbs = mat->mbs, tmp; 1323 for (i = 0; i < mbs; i++) { 1324 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1325 if (max < tmp) max = tmp; 1326 } 1327 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1328 } 1329 lrow = row - brstart; 1330 1331 pvA = &vworkA; 1332 pcA = &cworkA; 1333 pvB = &vworkB; 1334 pcB = &cworkB; 1335 if (!v) { 1336 pvA = NULL; 1337 pvB = NULL; 1338 } 1339 if (!idx) { 1340 pcA = NULL; 1341 if (!v) pcB = NULL; 1342 } 1343 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1344 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1345 nztot = nzA + nzB; 1346 1347 cmap = mat->garray; 1348 if (v || idx) { 1349 if (nztot) { 1350 /* Sort by increasing column numbers, assuming A and B already sorted */ 1351 PetscInt imark = -1; 1352 if (v) { 1353 *v = v_p = mat->rowvalues; 1354 for (i = 0; i < nzB; i++) { 1355 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1356 else break; 1357 } 1358 imark = i; 1359 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1360 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1361 } 1362 if (idx) { 1363 *idx = idx_p = mat->rowindices; 1364 if (imark > -1) { 1365 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1366 } else { 1367 for (i = 0; i < nzB; i++) { 1368 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1369 else break; 1370 } 1371 imark = i; 1372 } 1373 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1374 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1375 } 1376 } else { 1377 if (idx) *idx = NULL; 1378 if (v) *v = NULL; 1379 } 1380 } 1381 *nz = nztot; 1382 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1383 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1384 PetscFunctionReturn(PETSC_SUCCESS); 1385 } 1386 1387 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1388 { 1389 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1390 1391 PetscFunctionBegin; 1392 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1393 baij->getrowactive = PETSC_FALSE; 1394 PetscFunctionReturn(PETSC_SUCCESS); 1395 } 1396 1397 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1398 { 1399 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1400 1401 PetscFunctionBegin; 1402 PetscCall(MatZeroEntries(l->A)); 1403 PetscCall(MatZeroEntries(l->B)); 1404 PetscFunctionReturn(PETSC_SUCCESS); 1405 } 1406 1407 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1408 { 1409 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1410 Mat A = a->A, B = a->B; 1411 PetscLogDouble isend[5], irecv[5]; 1412 1413 PetscFunctionBegin; 1414 info->block_size = (PetscReal)matin->rmap->bs; 1415 1416 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1417 1418 isend[0] = info->nz_used; 1419 isend[1] = info->nz_allocated; 1420 isend[2] = info->nz_unneeded; 1421 isend[3] = info->memory; 1422 isend[4] = info->mallocs; 1423 1424 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1425 1426 isend[0] += info->nz_used; 1427 isend[1] += info->nz_allocated; 1428 isend[2] += info->nz_unneeded; 1429 isend[3] += info->memory; 1430 isend[4] += info->mallocs; 1431 1432 if (flag == MAT_LOCAL) { 1433 info->nz_used = isend[0]; 1434 info->nz_allocated = isend[1]; 1435 info->nz_unneeded = isend[2]; 1436 info->memory = isend[3]; 1437 info->mallocs = isend[4]; 1438 } else if (flag == MAT_GLOBAL_MAX) { 1439 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1440 1441 info->nz_used = irecv[0]; 1442 info->nz_allocated = irecv[1]; 1443 info->nz_unneeded = irecv[2]; 1444 info->memory = irecv[3]; 1445 info->mallocs = irecv[4]; 1446 } else if (flag == MAT_GLOBAL_SUM) { 1447 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1448 1449 info->nz_used = irecv[0]; 1450 info->nz_allocated = irecv[1]; 1451 info->nz_unneeded = irecv[2]; 1452 info->memory = irecv[3]; 1453 info->mallocs = irecv[4]; 1454 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1455 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1456 info->fill_ratio_needed = 0; 1457 info->factor_mallocs = 0; 1458 PetscFunctionReturn(PETSC_SUCCESS); 1459 } 1460 1461 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1462 { 1463 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1464 1465 PetscFunctionBegin; 1466 switch (op) { 1467 case MAT_NEW_NONZERO_LOCATIONS: 1468 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1469 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1470 case MAT_KEEP_NONZERO_PATTERN: 1471 case MAT_NEW_NONZERO_LOCATION_ERR: 1472 MatCheckPreallocated(A, 1); 1473 PetscCall(MatSetOption(a->A, op, flg)); 1474 PetscCall(MatSetOption(a->B, op, flg)); 1475 break; 1476 case MAT_ROW_ORIENTED: 1477 MatCheckPreallocated(A, 1); 1478 a->roworiented = flg; 1479 1480 PetscCall(MatSetOption(a->A, op, flg)); 1481 PetscCall(MatSetOption(a->B, op, flg)); 1482 break; 1483 case MAT_FORCE_DIAGONAL_ENTRIES: 1484 case MAT_SORTED_FULL: 1485 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1486 break; 1487 case MAT_IGNORE_OFF_PROC_ENTRIES: 1488 a->donotstash = flg; 1489 break; 1490 case MAT_USE_HASH_TABLE: 1491 a->ht_flag = flg; 1492 a->ht_fact = 1.39; 1493 break; 1494 case MAT_SYMMETRIC: 1495 case MAT_STRUCTURALLY_SYMMETRIC: 1496 case MAT_HERMITIAN: 1497 case MAT_SUBMAT_SINGLEIS: 1498 case MAT_SYMMETRY_ETERNAL: 1499 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1500 case MAT_SPD_ETERNAL: 1501 /* if the diagonal matrix is square it inherits some of the properties above */ 1502 break; 1503 default: 1504 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1505 } 1506 PetscFunctionReturn(PETSC_SUCCESS); 1507 } 1508 1509 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1510 { 1511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1512 Mat_SeqBAIJ *Aloc; 1513 Mat B; 1514 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1515 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1516 MatScalar *a; 1517 1518 PetscFunctionBegin; 1519 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1520 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1521 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1522 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1523 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1524 /* Do not know preallocation information, but must set block size */ 1525 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1526 } else { 1527 B = *matout; 1528 } 1529 1530 /* copy over the A part */ 1531 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1532 ai = Aloc->i; 1533 aj = Aloc->j; 1534 a = Aloc->a; 1535 PetscCall(PetscMalloc1(bs, &rvals)); 1536 1537 for (i = 0; i < mbs; i++) { 1538 rvals[0] = bs * (baij->rstartbs + i); 1539 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1540 for (j = ai[i]; j < ai[i + 1]; j++) { 1541 col = (baij->cstartbs + aj[j]) * bs; 1542 for (k = 0; k < bs; k++) { 1543 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1544 1545 col++; 1546 a += bs; 1547 } 1548 } 1549 } 1550 /* copy over the B part */ 1551 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1552 ai = Aloc->i; 1553 aj = Aloc->j; 1554 a = Aloc->a; 1555 for (i = 0; i < mbs; i++) { 1556 rvals[0] = bs * (baij->rstartbs + i); 1557 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1558 for (j = ai[i]; j < ai[i + 1]; j++) { 1559 col = baij->garray[aj[j]] * bs; 1560 for (k = 0; k < bs; k++) { 1561 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1562 col++; 1563 a += bs; 1564 } 1565 } 1566 } 1567 PetscCall(PetscFree(rvals)); 1568 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1569 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1570 1571 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1572 else PetscCall(MatHeaderMerge(A, &B)); 1573 PetscFunctionReturn(PETSC_SUCCESS); 1574 } 1575 1576 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1577 { 1578 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1579 Mat a = baij->A, b = baij->B; 1580 PetscInt s1, s2, s3; 1581 1582 PetscFunctionBegin; 1583 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1584 if (rr) { 1585 PetscCall(VecGetLocalSize(rr, &s1)); 1586 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1587 /* Overlap communication with computation. */ 1588 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1589 } 1590 if (ll) { 1591 PetscCall(VecGetLocalSize(ll, &s1)); 1592 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1593 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1594 } 1595 /* scale the diagonal block */ 1596 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1597 1598 if (rr) { 1599 /* Do a scatter end and then right scale the off-diagonal block */ 1600 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1601 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1602 } 1603 PetscFunctionReturn(PETSC_SUCCESS); 1604 } 1605 1606 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1607 { 1608 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1609 PetscInt *lrows; 1610 PetscInt r, len; 1611 PetscBool cong; 1612 1613 PetscFunctionBegin; 1614 /* get locally owned rows */ 1615 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1616 /* fix right-hand side if needed */ 1617 if (x && b) { 1618 const PetscScalar *xx; 1619 PetscScalar *bb; 1620 1621 PetscCall(VecGetArrayRead(x, &xx)); 1622 PetscCall(VecGetArray(b, &bb)); 1623 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1624 PetscCall(VecRestoreArrayRead(x, &xx)); 1625 PetscCall(VecRestoreArray(b, &bb)); 1626 } 1627 1628 /* actually zap the local rows */ 1629 /* 1630 Zero the required rows. If the "diagonal block" of the matrix 1631 is square and the user wishes to set the diagonal we use separate 1632 code so that MatSetValues() is not called for each diagonal allocating 1633 new memory, thus calling lots of mallocs and slowing things down. 1634 1635 */ 1636 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1637 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1638 PetscCall(MatHasCongruentLayouts(A, &cong)); 1639 if ((diag != 0.0) && cong) { 1640 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1641 } else if (diag != 0.0) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1643 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(PETSC_SUCCESS); 1661 } 1662 1663 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off-diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off-diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(PETSC_SUCCESS); 1757 } 1758 1759 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(PETSC_SUCCESS); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(PETSC_SUCCESS); 1803 } 1804 1805 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1806 { 1807 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1808 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1809 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1810 1811 PetscFunctionBegin; 1812 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1813 PetscFunctionReturn(PETSC_SUCCESS); 1814 } 1815 1816 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1817 { 1818 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1819 PetscBLASInt bnz, one = 1; 1820 Mat_SeqBAIJ *x, *y; 1821 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1822 1823 PetscFunctionBegin; 1824 if (str == SAME_NONZERO_PATTERN) { 1825 PetscScalar alpha = a; 1826 x = (Mat_SeqBAIJ *)xx->A->data; 1827 y = (Mat_SeqBAIJ *)yy->A->data; 1828 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1829 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1830 x = (Mat_SeqBAIJ *)xx->B->data; 1831 y = (Mat_SeqBAIJ *)yy->B->data; 1832 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1833 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1834 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1835 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1836 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1837 } else { 1838 Mat B; 1839 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1840 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1841 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1842 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1843 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1844 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1845 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1846 PetscCall(MatSetType(B, MATMPIBAIJ)); 1847 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1848 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1849 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1850 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1851 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1852 PetscCall(MatHeaderMerge(Y, &B)); 1853 PetscCall(PetscFree(nnz_d)); 1854 PetscCall(PetscFree(nnz_o)); 1855 } 1856 PetscFunctionReturn(PETSC_SUCCESS); 1857 } 1858 1859 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1860 { 1861 PetscFunctionBegin; 1862 if (PetscDefined(USE_COMPLEX)) { 1863 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1864 1865 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1866 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1867 } 1868 PetscFunctionReturn(PETSC_SUCCESS); 1869 } 1870 1871 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1872 { 1873 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1874 1875 PetscFunctionBegin; 1876 PetscCall(MatRealPart(a->A)); 1877 PetscCall(MatRealPart(a->B)); 1878 PetscFunctionReturn(PETSC_SUCCESS); 1879 } 1880 1881 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1882 { 1883 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1884 1885 PetscFunctionBegin; 1886 PetscCall(MatImaginaryPart(a->A)); 1887 PetscCall(MatImaginaryPart(a->B)); 1888 PetscFunctionReturn(PETSC_SUCCESS); 1889 } 1890 1891 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1892 { 1893 IS iscol_local; 1894 PetscInt csize; 1895 1896 PetscFunctionBegin; 1897 PetscCall(ISGetLocalSize(iscol, &csize)); 1898 if (call == MAT_REUSE_MATRIX) { 1899 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1900 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1901 } else { 1902 PetscCall(ISAllGather(iscol, &iscol_local)); 1903 } 1904 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1905 if (call == MAT_INITIAL_MATRIX) { 1906 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1907 PetscCall(ISDestroy(&iscol_local)); 1908 } 1909 PetscFunctionReturn(PETSC_SUCCESS); 1910 } 1911 1912 /* 1913 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1914 in local and then by concatenating the local matrices the end result. 1915 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1916 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1917 */ 1918 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1919 { 1920 PetscMPIInt rank, size; 1921 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1922 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1923 Mat M, Mreuse; 1924 MatScalar *vwork, *aa; 1925 MPI_Comm comm; 1926 IS isrow_new, iscol_new; 1927 Mat_SeqBAIJ *aij; 1928 1929 PetscFunctionBegin; 1930 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1931 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1932 PetscCallMPI(MPI_Comm_size(comm, &size)); 1933 /* The compression and expansion should be avoided. Doesn't point 1934 out errors, might change the indices, hence buggey */ 1935 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1936 if (isrow == iscol) { 1937 iscol_new = isrow_new; 1938 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1939 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1940 1941 if (call == MAT_REUSE_MATRIX) { 1942 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1943 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1944 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1945 } else { 1946 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1947 } 1948 PetscCall(ISDestroy(&isrow_new)); 1949 PetscCall(ISDestroy(&iscol_new)); 1950 /* 1951 m - number of local rows 1952 n - number of columns (same on all processors) 1953 rstart - first row in new global matrix generated 1954 */ 1955 PetscCall(MatGetBlockSize(mat, &bs)); 1956 PetscCall(MatGetSize(Mreuse, &m, &n)); 1957 m = m / bs; 1958 n = n / bs; 1959 1960 if (call == MAT_INITIAL_MATRIX) { 1961 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1962 ii = aij->i; 1963 jj = aij->j; 1964 1965 /* 1966 Determine the number of non-zeros in the diagonal and off-diagonal 1967 portions of the matrix in order to do correct preallocation 1968 */ 1969 1970 /* first get start and end of "diagonal" columns */ 1971 if (csize == PETSC_DECIDE) { 1972 PetscCall(ISGetSize(isrow, &mglobal)); 1973 if (mglobal == n * bs) { /* square matrix */ 1974 nlocal = m; 1975 } else { 1976 nlocal = n / size + ((n % size) > rank); 1977 } 1978 } else { 1979 nlocal = csize / bs; 1980 } 1981 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1982 rstart = rend - nlocal; 1983 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1984 1985 /* next, compute all the lengths */ 1986 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1987 for (i = 0; i < m; i++) { 1988 jend = ii[i + 1] - ii[i]; 1989 olen = 0; 1990 dlen = 0; 1991 for (j = 0; j < jend; j++) { 1992 if (*jj < rstart || *jj >= rend) olen++; 1993 else dlen++; 1994 jj++; 1995 } 1996 olens[i] = olen; 1997 dlens[i] = dlen; 1998 } 1999 PetscCall(MatCreate(comm, &M)); 2000 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2001 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2002 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2003 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2004 PetscCall(PetscFree2(dlens, olens)); 2005 } else { 2006 PetscInt ml, nl; 2007 2008 M = *newmat; 2009 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2010 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2011 PetscCall(MatZeroEntries(M)); 2012 /* 2013 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2014 rather than the slower MatSetValues(). 2015 */ 2016 M->was_assembled = PETSC_TRUE; 2017 M->assembled = PETSC_FALSE; 2018 } 2019 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2020 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2021 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2022 ii = aij->i; 2023 jj = aij->j; 2024 aa = aij->a; 2025 for (i = 0; i < m; i++) { 2026 row = rstart / bs + i; 2027 nz = ii[i + 1] - ii[i]; 2028 cwork = jj; 2029 jj = PetscSafePointerPlusOffset(jj, nz); 2030 vwork = aa; 2031 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2032 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2033 } 2034 2035 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2036 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2037 *newmat = M; 2038 2039 /* save submatrix used in processor for next request */ 2040 if (call == MAT_INITIAL_MATRIX) { 2041 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2042 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2043 } 2044 PetscFunctionReturn(PETSC_SUCCESS); 2045 } 2046 2047 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2048 { 2049 MPI_Comm comm, pcomm; 2050 PetscInt clocal_size, nrows; 2051 const PetscInt *rows; 2052 PetscMPIInt size; 2053 IS crowp, lcolp; 2054 2055 PetscFunctionBegin; 2056 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2057 /* make a collective version of 'rowp' */ 2058 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2059 if (pcomm == comm) { 2060 crowp = rowp; 2061 } else { 2062 PetscCall(ISGetSize(rowp, &nrows)); 2063 PetscCall(ISGetIndices(rowp, &rows)); 2064 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2065 PetscCall(ISRestoreIndices(rowp, &rows)); 2066 } 2067 PetscCall(ISSetPermutation(crowp)); 2068 /* make a local version of 'colp' */ 2069 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2070 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2071 if (size == 1) { 2072 lcolp = colp; 2073 } else { 2074 PetscCall(ISAllGather(colp, &lcolp)); 2075 } 2076 PetscCall(ISSetPermutation(lcolp)); 2077 /* now we just get the submatrix */ 2078 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2079 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2080 /* clean up */ 2081 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2082 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2083 PetscFunctionReturn(PETSC_SUCCESS); 2084 } 2085 2086 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2087 { 2088 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2089 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2090 2091 PetscFunctionBegin; 2092 if (nghosts) *nghosts = B->nbs; 2093 if (ghosts) *ghosts = baij->garray; 2094 PetscFunctionReturn(PETSC_SUCCESS); 2095 } 2096 2097 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2098 { 2099 Mat B; 2100 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2101 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2102 Mat_SeqAIJ *b; 2103 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2104 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2105 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2106 2107 PetscFunctionBegin; 2108 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2109 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2110 2111 /* Tell every processor the number of nonzeros per row */ 2112 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2113 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2114 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2115 displs = recvcounts + size; 2116 for (i = 0; i < size; i++) { 2117 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2118 displs[i] = A->rmap->range[i] / bs; 2119 } 2120 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2121 /* Create the sequential matrix of the same type as the local block diagonal */ 2122 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2123 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2124 PetscCall(MatSetType(B, MATSEQAIJ)); 2125 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2126 b = (Mat_SeqAIJ *)B->data; 2127 2128 /* Copy my part of matrix column indices over */ 2129 sendcount = ad->nz + bd->nz; 2130 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2131 a_jsendbuf = ad->j; 2132 b_jsendbuf = bd->j; 2133 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2134 cnt = 0; 2135 for (i = 0; i < n; i++) { 2136 /* put in lower diagonal portion */ 2137 m = bd->i[i + 1] - bd->i[i]; 2138 while (m > 0) { 2139 /* is it above diagonal (in bd (compressed) numbering) */ 2140 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2141 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2142 m--; 2143 } 2144 2145 /* put in diagonal portion */ 2146 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2147 2148 /* put in upper diagonal portion */ 2149 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 } 2151 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2152 2153 /* Gather all column indices to all processors */ 2154 for (i = 0; i < size; i++) { 2155 recvcounts[i] = 0; 2156 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2157 } 2158 displs[0] = 0; 2159 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2160 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2161 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2162 /* set the b->ilen (length of each row) values */ 2163 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2164 /* set the b->i indices */ 2165 b->i[0] = 0; 2166 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2167 PetscCall(PetscFree(lens)); 2168 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2169 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2170 PetscCall(PetscFree(recvcounts)); 2171 2172 PetscCall(MatPropagateSymmetryOptions(A, B)); 2173 *newmat = B; 2174 PetscFunctionReturn(PETSC_SUCCESS); 2175 } 2176 2177 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2178 { 2179 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2180 Vec bb1 = NULL; 2181 2182 PetscFunctionBegin; 2183 if (flag == SOR_APPLY_UPPER) { 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2185 PetscFunctionReturn(PETSC_SUCCESS); 2186 } 2187 2188 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2189 2190 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2191 if (flag & SOR_ZERO_INITIAL_GUESS) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 its--; 2194 } 2195 2196 while (its--) { 2197 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2198 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2199 2200 /* update rhs: bb1 = bb - B*x */ 2201 PetscCall(VecScale(mat->lvec, -1.0)); 2202 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2203 2204 /* local sweep */ 2205 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2206 } 2207 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2208 if (flag & SOR_ZERO_INITIAL_GUESS) { 2209 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2210 its--; 2211 } 2212 while (its--) { 2213 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2214 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2215 2216 /* update rhs: bb1 = bb - B*x */ 2217 PetscCall(VecScale(mat->lvec, -1.0)); 2218 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2219 2220 /* local sweep */ 2221 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2222 } 2223 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2224 if (flag & SOR_ZERO_INITIAL_GUESS) { 2225 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2226 its--; 2227 } 2228 while (its--) { 2229 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2230 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2231 2232 /* update rhs: bb1 = bb - B*x */ 2233 PetscCall(VecScale(mat->lvec, -1.0)); 2234 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2235 2236 /* local sweep */ 2237 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2238 } 2239 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2240 2241 PetscCall(VecDestroy(&bb1)); 2242 PetscFunctionReturn(PETSC_SUCCESS); 2243 } 2244 2245 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2246 { 2247 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2248 PetscInt m, N, i, *garray = aij->garray; 2249 PetscInt ib, jb, bs = A->rmap->bs; 2250 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2251 MatScalar *a_val = a_aij->a; 2252 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2253 MatScalar *b_val = b_aij->a; 2254 PetscReal *work; 2255 2256 PetscFunctionBegin; 2257 PetscCall(MatGetSize(A, &m, &N)); 2258 PetscCall(PetscCalloc1(N, &work)); 2259 if (type == NORM_2) { 2260 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2264 a_val++; 2265 } 2266 } 2267 } 2268 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2272 b_val++; 2273 } 2274 } 2275 } 2276 } else if (type == NORM_1) { 2277 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2281 a_val++; 2282 } 2283 } 2284 } 2285 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2286 for (jb = 0; jb < bs; jb++) { 2287 for (ib = 0; ib < bs; ib++) { 2288 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2289 b_val++; 2290 } 2291 } 2292 } 2293 } else if (type == NORM_INFINITY) { 2294 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2295 for (jb = 0; jb < bs; jb++) { 2296 for (ib = 0; ib < bs; ib++) { 2297 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2298 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2299 a_val++; 2300 } 2301 } 2302 } 2303 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2304 for (jb = 0; jb < bs; jb++) { 2305 for (ib = 0; ib < bs; ib++) { 2306 int col = garray[b_aij->j[i]] * bs + jb; 2307 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2313 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2325 b_val++; 2326 } 2327 } 2328 } 2329 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2330 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2331 for (jb = 0; jb < bs; jb++) { 2332 for (ib = 0; ib < bs; ib++) { 2333 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2334 a_val++; 2335 } 2336 } 2337 } 2338 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2339 for (jb = 0; jb < bs; jb++) { 2340 for (ib = 0; ib < bs; ib++) { 2341 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2342 b_val++; 2343 } 2344 } 2345 } 2346 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2347 if (type == NORM_INFINITY) { 2348 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2349 } else { 2350 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2351 } 2352 PetscCall(PetscFree(work)); 2353 if (type == NORM_2) { 2354 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2355 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2356 for (i = 0; i < N; i++) reductions[i] /= m; 2357 } 2358 PetscFunctionReturn(PETSC_SUCCESS); 2359 } 2360 2361 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2362 { 2363 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2364 2365 PetscFunctionBegin; 2366 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2367 A->factorerrortype = a->A->factorerrortype; 2368 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2369 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2374 { 2375 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2376 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2377 2378 PetscFunctionBegin; 2379 if (!Y->preallocated) { 2380 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2381 } else if (!aij->nz) { 2382 PetscInt nonew = aij->nonew; 2383 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2384 aij->nonew = nonew; 2385 } 2386 PetscCall(MatShift_Basic(Y, a)); 2387 PetscFunctionReturn(PETSC_SUCCESS); 2388 } 2389 2390 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2391 { 2392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2393 2394 PetscFunctionBegin; 2395 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2396 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2397 if (d) { 2398 PetscInt rstart; 2399 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2400 *d += rstart / A->rmap->bs; 2401 } 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2406 { 2407 PetscFunctionBegin; 2408 *a = ((Mat_MPIBAIJ *)A->data)->A; 2409 PetscFunctionReturn(PETSC_SUCCESS); 2410 } 2411 2412 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2413 { 2414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2415 2416 PetscFunctionBegin; 2417 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2418 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2419 PetscFunctionReturn(PETSC_SUCCESS); 2420 } 2421 2422 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2423 MatGetRow_MPIBAIJ, 2424 MatRestoreRow_MPIBAIJ, 2425 MatMult_MPIBAIJ, 2426 /* 4*/ MatMultAdd_MPIBAIJ, 2427 MatMultTranspose_MPIBAIJ, 2428 MatMultTransposeAdd_MPIBAIJ, 2429 NULL, 2430 NULL, 2431 NULL, 2432 /*10*/ NULL, 2433 NULL, 2434 NULL, 2435 MatSOR_MPIBAIJ, 2436 MatTranspose_MPIBAIJ, 2437 /*15*/ MatGetInfo_MPIBAIJ, 2438 MatEqual_MPIBAIJ, 2439 MatGetDiagonal_MPIBAIJ, 2440 MatDiagonalScale_MPIBAIJ, 2441 MatNorm_MPIBAIJ, 2442 /*20*/ MatAssemblyBegin_MPIBAIJ, 2443 MatAssemblyEnd_MPIBAIJ, 2444 MatSetOption_MPIBAIJ, 2445 MatZeroEntries_MPIBAIJ, 2446 /*24*/ MatZeroRows_MPIBAIJ, 2447 NULL, 2448 NULL, 2449 NULL, 2450 NULL, 2451 /*29*/ MatSetUp_MPI_Hash, 2452 NULL, 2453 NULL, 2454 MatGetDiagonalBlock_MPIBAIJ, 2455 NULL, 2456 /*34*/ MatDuplicate_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 NULL, 2460 NULL, 2461 /*39*/ MatAXPY_MPIBAIJ, 2462 MatCreateSubMatrices_MPIBAIJ, 2463 MatIncreaseOverlap_MPIBAIJ, 2464 MatGetValues_MPIBAIJ, 2465 MatCopy_MPIBAIJ, 2466 /*44*/ NULL, 2467 MatScale_MPIBAIJ, 2468 MatShift_MPIBAIJ, 2469 NULL, 2470 MatZeroRowsColumns_MPIBAIJ, 2471 /*49*/ NULL, 2472 NULL, 2473 NULL, 2474 NULL, 2475 NULL, 2476 /*54*/ MatFDColoringCreate_MPIXAIJ, 2477 NULL, 2478 MatSetUnfactored_MPIBAIJ, 2479 MatPermute_MPIBAIJ, 2480 MatSetValuesBlocked_MPIBAIJ, 2481 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2482 MatDestroy_MPIBAIJ, 2483 MatView_MPIBAIJ, 2484 NULL, 2485 NULL, 2486 /*64*/ NULL, 2487 NULL, 2488 NULL, 2489 NULL, 2490 NULL, 2491 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*74*/ NULL, 2497 MatFDColoringApply_BAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*79*/ NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 MatLoad_MPIBAIJ, 2506 /*84*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 /*89*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*94*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*99*/ NULL, 2522 NULL, 2523 NULL, 2524 MatConjugate_MPIBAIJ, 2525 NULL, 2526 /*104*/ NULL, 2527 MatRealPart_MPIBAIJ, 2528 MatImaginaryPart_MPIBAIJ, 2529 NULL, 2530 NULL, 2531 /*109*/ NULL, 2532 NULL, 2533 NULL, 2534 NULL, 2535 MatMissingDiagonal_MPIBAIJ, 2536 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2537 NULL, 2538 MatGetGhosts_MPIBAIJ, 2539 NULL, 2540 NULL, 2541 /*119*/ NULL, 2542 NULL, 2543 NULL, 2544 NULL, 2545 MatGetMultiProcBlock_MPIBAIJ, 2546 /*124*/ NULL, 2547 MatGetColumnReductions_MPIBAIJ, 2548 MatInvertBlockDiagonal_MPIBAIJ, 2549 NULL, 2550 NULL, 2551 /*129*/ NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 NULL, 2556 /*134*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*139*/ MatSetBlockSizes_Default, 2562 NULL, 2563 NULL, 2564 MatFDColoringSetUp_MPIXAIJ, 2565 NULL, 2566 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2567 NULL, 2568 NULL, 2569 NULL, 2570 NULL, 2571 NULL, 2572 /*150*/ NULL, 2573 MatEliminateZeros_MPIBAIJ, 2574 MatGetRowSumAbs_MPIBAIJ, 2575 NULL}; 2576 2577 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2578 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2579 2580 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2581 { 2582 PetscInt m, rstart, cstart, cend; 2583 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2584 const PetscInt *JJ = NULL; 2585 PetscScalar *values = NULL; 2586 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2587 PetscBool nooffprocentries; 2588 2589 PetscFunctionBegin; 2590 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2591 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2592 PetscCall(PetscLayoutSetUp(B->rmap)); 2593 PetscCall(PetscLayoutSetUp(B->cmap)); 2594 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2595 m = B->rmap->n / bs; 2596 rstart = B->rmap->rstart / bs; 2597 cstart = B->cmap->rstart / bs; 2598 cend = B->cmap->rend / bs; 2599 2600 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2601 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2602 for (i = 0; i < m; i++) { 2603 nz = ii[i + 1] - ii[i]; 2604 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2605 nz_max = PetscMax(nz_max, nz); 2606 dlen = 0; 2607 olen = 0; 2608 JJ = jj + ii[i]; 2609 for (j = 0; j < nz; j++) { 2610 if (*JJ < cstart || *JJ >= cend) olen++; 2611 else dlen++; 2612 JJ++; 2613 } 2614 d_nnz[i] = dlen; 2615 o_nnz[i] = olen; 2616 } 2617 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2618 PetscCall(PetscFree2(d_nnz, o_nnz)); 2619 2620 values = (PetscScalar *)V; 2621 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2622 for (i = 0; i < m; i++) { 2623 PetscInt row = i + rstart; 2624 PetscInt ncols = ii[i + 1] - ii[i]; 2625 const PetscInt *icols = jj + ii[i]; 2626 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2627 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2628 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2629 } else { /* block ordering does not match so we can only insert one block at a time. */ 2630 PetscInt j; 2631 for (j = 0; j < ncols; j++) { 2632 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2633 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2634 } 2635 } 2636 } 2637 2638 if (!V) PetscCall(PetscFree(values)); 2639 nooffprocentries = B->nooffprocentries; 2640 B->nooffprocentries = PETSC_TRUE; 2641 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2642 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2643 B->nooffprocentries = nooffprocentries; 2644 2645 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2646 PetscFunctionReturn(PETSC_SUCCESS); 2647 } 2648 2649 /*@C 2650 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2651 2652 Collective 2653 2654 Input Parameters: 2655 + B - the matrix 2656 . bs - the block size 2657 . i - the indices into `j` for the start of each local row (starts with zero) 2658 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2659 - v - optional values in the matrix, use `NULL` if not provided 2660 2661 Level: advanced 2662 2663 Notes: 2664 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2665 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2666 called this routine. 2667 2668 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2669 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2670 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2671 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2672 block column and the second index is over columns within a block. 2673 2674 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2675 2676 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2677 @*/ 2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2682 PetscValidType(B, 1); 2683 PetscValidLogicalCollectiveInt(B, bs, 2); 2684 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2689 { 2690 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2691 PetscInt i; 2692 PetscMPIInt size; 2693 2694 PetscFunctionBegin; 2695 if (B->hash_active) { 2696 B->ops[0] = b->cops; 2697 B->hash_active = PETSC_FALSE; 2698 } 2699 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2700 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2701 PetscCall(PetscLayoutSetUp(B->rmap)); 2702 PetscCall(PetscLayoutSetUp(B->cmap)); 2703 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2704 2705 if (d_nnz) { 2706 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2707 } 2708 if (o_nnz) { 2709 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2710 } 2711 2712 b->bs2 = bs * bs; 2713 b->mbs = B->rmap->n / bs; 2714 b->nbs = B->cmap->n / bs; 2715 b->Mbs = B->rmap->N / bs; 2716 b->Nbs = B->cmap->N / bs; 2717 2718 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2719 b->rstartbs = B->rmap->rstart / bs; 2720 b->rendbs = B->rmap->rend / bs; 2721 b->cstartbs = B->cmap->rstart / bs; 2722 b->cendbs = B->cmap->rend / bs; 2723 2724 #if defined(PETSC_USE_CTABLE) 2725 PetscCall(PetscHMapIDestroy(&b->colmap)); 2726 #else 2727 PetscCall(PetscFree(b->colmap)); 2728 #endif 2729 PetscCall(PetscFree(b->garray)); 2730 PetscCall(VecDestroy(&b->lvec)); 2731 PetscCall(VecScatterDestroy(&b->Mvctx)); 2732 2733 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2734 2735 MatSeqXAIJGetOptions_Private(b->B); 2736 PetscCall(MatDestroy(&b->B)); 2737 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2738 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2739 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2740 MatSeqXAIJRestoreOptions_Private(b->B); 2741 2742 MatSeqXAIJGetOptions_Private(b->A); 2743 PetscCall(MatDestroy(&b->A)); 2744 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2745 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2746 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2747 MatSeqXAIJRestoreOptions_Private(b->A); 2748 2749 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2750 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2751 B->preallocated = PETSC_TRUE; 2752 B->was_assembled = PETSC_FALSE; 2753 B->assembled = PETSC_FALSE; 2754 PetscFunctionReturn(PETSC_SUCCESS); 2755 } 2756 2757 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2758 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2759 2760 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2761 { 2762 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2763 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2764 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2765 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2766 2767 PetscFunctionBegin; 2768 PetscCall(PetscMalloc1(M + 1, &ii)); 2769 ii[0] = 0; 2770 for (i = 0; i < M; i++) { 2771 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2772 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2773 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2774 /* remove one from count of matrix has diagonal */ 2775 for (j = id[i]; j < id[i + 1]; j++) { 2776 if (jd[j] == i) { 2777 ii[i + 1]--; 2778 break; 2779 } 2780 } 2781 } 2782 PetscCall(PetscMalloc1(ii[M], &jj)); 2783 cnt = 0; 2784 for (i = 0; i < M; i++) { 2785 for (j = io[i]; j < io[i + 1]; j++) { 2786 if (garray[jo[j]] > rstart) break; 2787 jj[cnt++] = garray[jo[j]]; 2788 } 2789 for (k = id[i]; k < id[i + 1]; k++) { 2790 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2791 } 2792 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2793 } 2794 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2795 PetscFunctionReturn(PETSC_SUCCESS); 2796 } 2797 2798 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2799 2800 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2801 2802 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2803 { 2804 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2805 Mat_MPIAIJ *b; 2806 Mat B; 2807 2808 PetscFunctionBegin; 2809 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2810 2811 if (reuse == MAT_REUSE_MATRIX) { 2812 B = *newmat; 2813 } else { 2814 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2815 PetscCall(MatSetType(B, MATMPIAIJ)); 2816 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2817 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2818 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2819 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2820 } 2821 b = (Mat_MPIAIJ *)B->data; 2822 2823 if (reuse == MAT_REUSE_MATRIX) { 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2825 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2826 } else { 2827 PetscInt *garray = a->garray; 2828 Mat_SeqAIJ *bB; 2829 PetscInt bs, nnz; 2830 PetscCall(MatDestroy(&b->A)); 2831 PetscCall(MatDestroy(&b->B)); 2832 /* just clear out the data structure */ 2833 PetscCall(MatDisAssemble_MPIAIJ(B)); 2834 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2835 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2836 2837 /* Global numbering for b->B columns */ 2838 bB = (Mat_SeqAIJ *)b->B->data; 2839 bs = A->rmap->bs; 2840 nnz = bB->i[A->rmap->n]; 2841 for (PetscInt k = 0; k < nnz; k++) { 2842 PetscInt bj = bB->j[k] / bs; 2843 PetscInt br = bB->j[k] % bs; 2844 bB->j[k] = garray[bj] * bs + br; 2845 } 2846 } 2847 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2848 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2849 2850 if (reuse == MAT_INPLACE_MATRIX) { 2851 PetscCall(MatHeaderReplace(A, &B)); 2852 } else { 2853 *newmat = B; 2854 } 2855 PetscFunctionReturn(PETSC_SUCCESS); 2856 } 2857 2858 /*MC 2859 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2860 2861 Options Database Keys: 2862 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2863 . -mat_block_size <bs> - set the blocksize used to store the matrix 2864 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2865 - -mat_use_hash_table <fact> - set hash table factor 2866 2867 Level: beginner 2868 2869 Note: 2870 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2871 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2872 2873 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2874 M*/ 2875 2876 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2877 2878 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2879 { 2880 Mat_MPIBAIJ *b; 2881 PetscBool flg = PETSC_FALSE; 2882 2883 PetscFunctionBegin; 2884 PetscCall(PetscNew(&b)); 2885 B->data = (void *)b; 2886 B->ops[0] = MatOps_Values; 2887 B->assembled = PETSC_FALSE; 2888 2889 B->insertmode = NOT_SET_VALUES; 2890 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2891 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2892 2893 /* build local table of row and column ownerships */ 2894 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2895 2896 /* build cache for off array entries formed */ 2897 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2898 2899 b->donotstash = PETSC_FALSE; 2900 b->colmap = NULL; 2901 b->garray = NULL; 2902 b->roworiented = PETSC_TRUE; 2903 2904 /* stuff used in block assembly */ 2905 b->barray = NULL; 2906 2907 /* stuff used for matrix vector multiply */ 2908 b->lvec = NULL; 2909 b->Mvctx = NULL; 2910 2911 /* stuff for MatGetRow() */ 2912 b->rowindices = NULL; 2913 b->rowvalues = NULL; 2914 b->getrowactive = PETSC_FALSE; 2915 2916 /* hash table stuff */ 2917 b->ht = NULL; 2918 b->hd = NULL; 2919 b->ht_size = 0; 2920 b->ht_flag = PETSC_FALSE; 2921 b->ht_fact = 0; 2922 b->ht_total_ct = 0; 2923 b->ht_insert_ct = 0; 2924 2925 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2926 b->ijonly = PETSC_FALSE; 2927 2928 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2931 #if defined(PETSC_HAVE_HYPRE) 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2933 #endif 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2941 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2942 2943 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2944 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2945 if (flg) { 2946 PetscReal fact = 1.39; 2947 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2948 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2949 if (fact <= 1.0) fact = 1.39; 2950 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2951 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2952 } 2953 PetscOptionsEnd(); 2954 PetscFunctionReturn(PETSC_SUCCESS); 2955 } 2956 2957 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2958 /*MC 2959 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2960 2961 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2962 and `MATMPIBAIJ` otherwise. 2963 2964 Options Database Keys: 2965 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2966 2967 Level: beginner 2968 2969 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2970 M*/ 2971 2972 /*@ 2973 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2974 (block compressed row). 2975 2976 Collective 2977 2978 Input Parameters: 2979 + B - the matrix 2980 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2981 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2982 . d_nz - number of block nonzeros per block row in diagonal portion of local 2983 submatrix (same for all local rows) 2984 . d_nnz - array containing the number of block nonzeros in the various block rows 2985 of the in diagonal portion of the local (possibly different for each block 2986 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2987 set it even if it is zero. 2988 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2989 submatrix (same for all local rows). 2990 - o_nnz - array containing the number of nonzeros in the various block rows of the 2991 off-diagonal portion of the local submatrix (possibly different for 2992 each block row) or `NULL`. 2993 2994 If the *_nnz parameter is given then the *_nz parameter is ignored 2995 2996 Options Database Keys: 2997 + -mat_block_size - size of the blocks to use 2998 - -mat_use_hash_table <fact> - set hash table factor 2999 3000 Level: intermediate 3001 3002 Notes: 3003 For good matrix assembly performance 3004 the user should preallocate the matrix storage by setting the parameters 3005 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3006 performance can be increased by more than a factor of 50. 3007 3008 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3009 than it must be used on all processors that share the object for that argument. 3010 3011 Storage Information: 3012 For a square global matrix we define each processor's diagonal portion 3013 to be its local rows and the corresponding columns (a square submatrix); 3014 each processor's off-diagonal portion encompasses the remainder of the 3015 local matrix (a rectangular submatrix). 3016 3017 The user can specify preallocated storage for the diagonal part of 3018 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3019 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3020 memory allocation. Likewise, specify preallocated storage for the 3021 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3022 3023 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3024 the figure below we depict these three local rows and all columns (0-11). 3025 3026 .vb 3027 0 1 2 3 4 5 6 7 8 9 10 11 3028 -------------------------- 3029 row 3 |o o o d d d o o o o o o 3030 row 4 |o o o d d d o o o o o o 3031 row 5 |o o o d d d o o o o o o 3032 -------------------------- 3033 .ve 3034 3035 Thus, any entries in the d locations are stored in the d (diagonal) 3036 submatrix, and any entries in the o locations are stored in the 3037 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3038 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3039 3040 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3041 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3042 In general, for PDE problems in which most nonzeros are near the diagonal, 3043 one expects `d_nz` >> `o_nz`. 3044 3045 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3046 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3047 You can also run with the option `-info` and look for messages with the string 3048 malloc in them to see if additional memory allocation was needed. 3049 3050 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3051 @*/ 3052 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3053 { 3054 PetscFunctionBegin; 3055 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3056 PetscValidType(B, 1); 3057 PetscValidLogicalCollectiveInt(B, bs, 2); 3058 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3059 PetscFunctionReturn(PETSC_SUCCESS); 3060 } 3061 3062 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3063 /*@ 3064 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3065 (block compressed row). 3066 3067 Collective 3068 3069 Input Parameters: 3070 + comm - MPI communicator 3071 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3072 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3073 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3074 This value should be the same as the local size used in creating the 3075 y vector for the matrix-vector product y = Ax. 3076 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3077 This value should be the same as the local size used in creating the 3078 x vector for the matrix-vector product y = Ax. 3079 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3080 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3081 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3082 submatrix (same for all local rows) 3083 . d_nnz - array containing the number of nonzero blocks in the various block rows 3084 of the in diagonal portion of the local (possibly different for each block 3085 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3086 and set it even if it is zero. 3087 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3088 submatrix (same for all local rows). 3089 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3090 off-diagonal portion of the local submatrix (possibly different for 3091 each block row) or NULL. 3092 3093 Output Parameter: 3094 . A - the matrix 3095 3096 Options Database Keys: 3097 + -mat_block_size - size of the blocks to use 3098 - -mat_use_hash_table <fact> - set hash table factor 3099 3100 Level: intermediate 3101 3102 Notes: 3103 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3104 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3105 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3106 3107 For good matrix assembly performance 3108 the user should preallocate the matrix storage by setting the parameters 3109 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3110 performance can be increased by more than a factor of 50. 3111 3112 If the *_nnz parameter is given then the *_nz parameter is ignored 3113 3114 A nonzero block is any block that as 1 or more nonzeros in it 3115 3116 The user MUST specify either the local or global matrix dimensions 3117 (possibly both). 3118 3119 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3120 than it must be used on all processors that share the object for that argument. 3121 3122 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3123 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3124 3125 Storage Information: 3126 For a square global matrix we define each processor's diagonal portion 3127 to be its local rows and the corresponding columns (a square submatrix); 3128 each processor's off-diagonal portion encompasses the remainder of the 3129 local matrix (a rectangular submatrix). 3130 3131 The user can specify preallocated storage for the diagonal part of 3132 the local submatrix with either d_nz or d_nnz (not both). Set 3133 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3134 memory allocation. Likewise, specify preallocated storage for the 3135 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3136 3137 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3138 the figure below we depict these three local rows and all columns (0-11). 3139 3140 .vb 3141 0 1 2 3 4 5 6 7 8 9 10 11 3142 -------------------------- 3143 row 3 |o o o d d d o o o o o o 3144 row 4 |o o o d d d o o o o o o 3145 row 5 |o o o d d d o o o o o o 3146 -------------------------- 3147 .ve 3148 3149 Thus, any entries in the d locations are stored in the d (diagonal) 3150 submatrix, and any entries in the o locations are stored in the 3151 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3152 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3153 3154 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3155 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3156 In general, for PDE problems in which most nonzeros are near the diagonal, 3157 one expects `d_nz` >> `o_nz`. 3158 3159 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3160 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3161 @*/ 3162 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3163 { 3164 PetscMPIInt size; 3165 3166 PetscFunctionBegin; 3167 PetscCall(MatCreate(comm, A)); 3168 PetscCall(MatSetSizes(*A, m, n, M, N)); 3169 PetscCallMPI(MPI_Comm_size(comm, &size)); 3170 if (size > 1) { 3171 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3172 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3173 } else { 3174 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3175 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3176 } 3177 PetscFunctionReturn(PETSC_SUCCESS); 3178 } 3179 3180 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3181 { 3182 Mat mat; 3183 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3184 PetscInt len = 0; 3185 3186 PetscFunctionBegin; 3187 *newmat = NULL; 3188 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3189 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3190 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3191 3192 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3193 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3194 if (matin->hash_active) { 3195 PetscCall(MatSetUp(mat)); 3196 } else { 3197 mat->factortype = matin->factortype; 3198 mat->preallocated = PETSC_TRUE; 3199 mat->assembled = PETSC_TRUE; 3200 mat->insertmode = NOT_SET_VALUES; 3201 3202 a = (Mat_MPIBAIJ *)mat->data; 3203 mat->rmap->bs = matin->rmap->bs; 3204 a->bs2 = oldmat->bs2; 3205 a->mbs = oldmat->mbs; 3206 a->nbs = oldmat->nbs; 3207 a->Mbs = oldmat->Mbs; 3208 a->Nbs = oldmat->Nbs; 3209 3210 a->size = oldmat->size; 3211 a->rank = oldmat->rank; 3212 a->donotstash = oldmat->donotstash; 3213 a->roworiented = oldmat->roworiented; 3214 a->rowindices = NULL; 3215 a->rowvalues = NULL; 3216 a->getrowactive = PETSC_FALSE; 3217 a->barray = NULL; 3218 a->rstartbs = oldmat->rstartbs; 3219 a->rendbs = oldmat->rendbs; 3220 a->cstartbs = oldmat->cstartbs; 3221 a->cendbs = oldmat->cendbs; 3222 3223 /* hash table stuff */ 3224 a->ht = NULL; 3225 a->hd = NULL; 3226 a->ht_size = 0; 3227 a->ht_flag = oldmat->ht_flag; 3228 a->ht_fact = oldmat->ht_fact; 3229 a->ht_total_ct = 0; 3230 a->ht_insert_ct = 0; 3231 3232 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3233 if (oldmat->colmap) { 3234 #if defined(PETSC_USE_CTABLE) 3235 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3236 #else 3237 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3238 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3239 #endif 3240 } else a->colmap = NULL; 3241 3242 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3243 PetscCall(PetscMalloc1(len, &a->garray)); 3244 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3245 } else a->garray = NULL; 3246 3247 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3248 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3249 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3250 3251 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3252 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3253 } 3254 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3255 *newmat = mat; 3256 PetscFunctionReturn(PETSC_SUCCESS); 3257 } 3258 3259 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3260 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3261 { 3262 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3263 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3264 PetscScalar *matvals; 3265 3266 PetscFunctionBegin; 3267 PetscCall(PetscViewerSetUp(viewer)); 3268 3269 /* read in matrix header */ 3270 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3271 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3272 M = header[1]; 3273 N = header[2]; 3274 nz = header[3]; 3275 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3276 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3277 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3278 3279 /* set block sizes from the viewer's .info file */ 3280 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3281 /* set local sizes if not set already */ 3282 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3283 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3284 /* set global sizes if not set already */ 3285 if (mat->rmap->N < 0) mat->rmap->N = M; 3286 if (mat->cmap->N < 0) mat->cmap->N = N; 3287 PetscCall(PetscLayoutSetUp(mat->rmap)); 3288 PetscCall(PetscLayoutSetUp(mat->cmap)); 3289 3290 /* check if the matrix sizes are correct */ 3291 PetscCall(MatGetSize(mat, &rows, &cols)); 3292 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3293 PetscCall(MatGetBlockSize(mat, &bs)); 3294 PetscCall(MatGetLocalSize(mat, &m, &n)); 3295 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3296 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3297 mbs = m / bs; 3298 nbs = n / bs; 3299 3300 /* read in row lengths and build row indices */ 3301 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3302 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3303 rowidxs[0] = 0; 3304 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3305 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3306 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3307 3308 /* read in column indices and matrix values */ 3309 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3310 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3311 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3312 3313 { /* preallocate matrix storage */ 3314 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3315 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3316 PetscBool sbaij, done; 3317 PetscInt *d_nnz, *o_nnz; 3318 3319 PetscCall(PetscBTCreate(nbs, &bt)); 3320 PetscCall(PetscHSetICreate(&ht)); 3321 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3322 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3323 for (i = 0; i < mbs; i++) { 3324 PetscCall(PetscBTMemzero(nbs, bt)); 3325 PetscCall(PetscHSetIClear(ht)); 3326 for (k = 0; k < bs; k++) { 3327 PetscInt row = bs * i + k; 3328 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3329 PetscInt col = colidxs[j]; 3330 if (!sbaij || col >= row) { 3331 if (col >= cs && col < ce) { 3332 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3333 } else { 3334 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3335 if (done) o_nnz[i]++; 3336 } 3337 } 3338 } 3339 } 3340 } 3341 PetscCall(PetscBTDestroy(&bt)); 3342 PetscCall(PetscHSetIDestroy(&ht)); 3343 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3344 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3345 PetscCall(PetscFree2(d_nnz, o_nnz)); 3346 } 3347 3348 /* store matrix values */ 3349 for (i = 0; i < m; i++) { 3350 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3351 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3352 } 3353 3354 PetscCall(PetscFree(rowidxs)); 3355 PetscCall(PetscFree2(colidxs, matvals)); 3356 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3357 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3358 PetscFunctionReturn(PETSC_SUCCESS); 3359 } 3360 3361 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3362 { 3363 PetscBool isbinary; 3364 3365 PetscFunctionBegin; 3366 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3367 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3368 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3369 PetscFunctionReturn(PETSC_SUCCESS); 3370 } 3371 3372 /*@ 3373 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3374 3375 Input Parameters: 3376 + mat - the matrix 3377 - fact - factor 3378 3379 Options Database Key: 3380 . -mat_use_hash_table <fact> - provide the factor 3381 3382 Level: advanced 3383 3384 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3385 @*/ 3386 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3387 { 3388 PetscFunctionBegin; 3389 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3390 PetscFunctionReturn(PETSC_SUCCESS); 3391 } 3392 3393 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3394 { 3395 Mat_MPIBAIJ *baij; 3396 3397 PetscFunctionBegin; 3398 baij = (Mat_MPIBAIJ *)mat->data; 3399 baij->ht_fact = fact; 3400 PetscFunctionReturn(PETSC_SUCCESS); 3401 } 3402 3403 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3404 { 3405 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3406 PetscBool flg; 3407 3408 PetscFunctionBegin; 3409 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3410 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3411 if (Ad) *Ad = a->A; 3412 if (Ao) *Ao = a->B; 3413 if (colmap) *colmap = a->garray; 3414 PetscFunctionReturn(PETSC_SUCCESS); 3415 } 3416 3417 /* 3418 Special version for direct calls from Fortran (to eliminate two function call overheads 3419 */ 3420 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3421 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3422 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3423 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3424 #endif 3425 3426 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3427 /*@C 3428 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3429 3430 Collective 3431 3432 Input Parameters: 3433 + matin - the matrix 3434 . min - number of input rows 3435 . im - input rows 3436 . nin - number of input columns 3437 . in - input columns 3438 . v - numerical values input 3439 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3440 3441 Level: advanced 3442 3443 Developer Notes: 3444 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3445 3446 .seealso: `Mat`, `MatSetValuesBlocked()` 3447 @*/ 3448 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3449 { 3450 /* convert input arguments to C version */ 3451 Mat mat = *matin; 3452 PetscInt m = *min, n = *nin; 3453 InsertMode addv = *addvin; 3454 3455 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3456 const MatScalar *value; 3457 MatScalar *barray = baij->barray; 3458 PetscBool roworiented = baij->roworiented; 3459 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3460 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3461 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3462 3463 PetscFunctionBegin; 3464 /* tasks normally handled by MatSetValuesBlocked() */ 3465 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3466 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3467 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3468 if (mat->assembled) { 3469 mat->was_assembled = PETSC_TRUE; 3470 mat->assembled = PETSC_FALSE; 3471 } 3472 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3473 3474 if (!barray) { 3475 PetscCall(PetscMalloc1(bs2, &barray)); 3476 baij->barray = barray; 3477 } 3478 3479 if (roworiented) stepval = (n - 1) * bs; 3480 else stepval = (m - 1) * bs; 3481 3482 for (i = 0; i < m; i++) { 3483 if (im[i] < 0) continue; 3484 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3485 if (im[i] >= rstart && im[i] < rend) { 3486 row = im[i] - rstart; 3487 for (j = 0; j < n; j++) { 3488 /* If NumCol = 1 then a copy is not required */ 3489 if ((roworiented) && (n == 1)) { 3490 barray = (MatScalar *)v + i * bs2; 3491 } else if ((!roworiented) && (m == 1)) { 3492 barray = (MatScalar *)v + j * bs2; 3493 } else { /* Here a copy is required */ 3494 if (roworiented) { 3495 value = v + i * (stepval + bs) * bs + j * bs; 3496 } else { 3497 value = v + j * (stepval + bs) * bs + i * bs; 3498 } 3499 for (ii = 0; ii < bs; ii++, value += stepval) { 3500 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3501 } 3502 barray -= bs2; 3503 } 3504 3505 if (in[j] >= cstart && in[j] < cend) { 3506 col = in[j] - cstart; 3507 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3508 } else if (in[j] < 0) { 3509 continue; 3510 } else { 3511 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3512 if (mat->was_assembled) { 3513 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3514 3515 #if defined(PETSC_USE_DEBUG) 3516 #if defined(PETSC_USE_CTABLE) 3517 { 3518 PetscInt data; 3519 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3520 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3521 } 3522 #else 3523 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3524 #endif 3525 #endif 3526 #if defined(PETSC_USE_CTABLE) 3527 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3528 col = (col - 1) / bs; 3529 #else 3530 col = (baij->colmap[in[j]] - 1) / bs; 3531 #endif 3532 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3533 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3534 col = in[j]; 3535 } 3536 } else col = in[j]; 3537 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3538 } 3539 } 3540 } else { 3541 if (!baij->donotstash) { 3542 if (roworiented) { 3543 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3544 } else { 3545 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3546 } 3547 } 3548 } 3549 } 3550 3551 /* task normally handled by MatSetValuesBlocked() */ 3552 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3553 PetscFunctionReturn(PETSC_SUCCESS); 3554 } 3555 3556 /*@ 3557 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3558 3559 Collective 3560 3561 Input Parameters: 3562 + comm - MPI communicator 3563 . bs - the block size, only a block size of 1 is supported 3564 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3565 . n - This value should be the same as the local size used in creating the 3566 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3567 calculated if `N` is given) For square matrices `n` is almost always `m`. 3568 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3569 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3570 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3571 . j - column indices 3572 - a - matrix values 3573 3574 Output Parameter: 3575 . mat - the matrix 3576 3577 Level: intermediate 3578 3579 Notes: 3580 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3581 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3582 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3583 3584 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3585 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3586 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3587 with column-major ordering within blocks. 3588 3589 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3590 3591 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3592 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3593 @*/ 3594 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3595 { 3596 PetscFunctionBegin; 3597 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3598 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3599 PetscCall(MatCreate(comm, mat)); 3600 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3601 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3602 PetscCall(MatSetBlockSize(*mat, bs)); 3603 PetscCall(MatSetUp(*mat)); 3604 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3605 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3606 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3607 PetscFunctionReturn(PETSC_SUCCESS); 3608 } 3609 3610 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3611 { 3612 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3613 PetscInt *indx; 3614 PetscScalar *values; 3615 3616 PetscFunctionBegin; 3617 PetscCall(MatGetSize(inmat, &m, &N)); 3618 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3619 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3620 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3621 PetscInt *bindx, rmax = a->rmax, j; 3622 PetscMPIInt rank, size; 3623 3624 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3625 mbs = m / bs; 3626 Nbs = N / cbs; 3627 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3628 nbs = n / cbs; 3629 3630 PetscCall(PetscMalloc1(rmax, &bindx)); 3631 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3632 3633 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3634 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3635 if (rank == size - 1) { 3636 /* Check sum(nbs) = Nbs */ 3637 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3638 } 3639 3640 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3641 for (i = 0; i < mbs; i++) { 3642 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3643 nnz = nnz / bs; 3644 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3645 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3646 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3647 } 3648 PetscCall(PetscFree(bindx)); 3649 3650 PetscCall(MatCreate(comm, outmat)); 3651 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3652 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3653 PetscCall(MatSetType(*outmat, MATBAIJ)); 3654 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3655 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3656 MatPreallocateEnd(dnz, onz); 3657 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3658 } 3659 3660 /* numeric phase */ 3661 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3662 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3663 3664 for (i = 0; i < m; i++) { 3665 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3666 Ii = i + rstart; 3667 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3668 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3669 } 3670 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3671 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3672 PetscFunctionReturn(PETSC_SUCCESS); 3673 } 3674