1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 #if defined(PETSC_USE_LOG) 13 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 14 #endif 15 PetscCall(MatStashDestroy_Private(&mat->stash)); 16 PetscCall(MatStashDestroy_Private(&mat->bstash)); 17 PetscCall(MatDestroy(&baij->A)); 18 PetscCall(MatDestroy(&baij->B)); 19 #if defined(PETSC_USE_CTABLE) 20 PetscCall(PetscHMapIDestroy(&baij->colmap)); 21 #else 22 PetscCall(PetscFree(baij->colmap)); 23 #endif 24 PetscCall(PetscFree(baij->garray)); 25 PetscCall(VecDestroy(&baij->lvec)); 26 PetscCall(VecScatterDestroy(&baij->Mvctx)); 27 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 28 PetscCall(PetscFree(baij->barray)); 29 PetscCall(PetscFree2(baij->hd, baij->ht)); 30 PetscCall(PetscFree(baij->rangebs)); 31 PetscCall(PetscFree(mat->data)); 32 33 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 41 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 43 #if defined(PETSC_HAVE_HYPRE) 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 45 #endif 46 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 47 PetscFunctionReturn(PETSC_SUCCESS); 48 } 49 50 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 51 #define TYPE BAIJ 52 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 53 #undef TYPE 54 55 #if defined(PETSC_HAVE_HYPRE) 56 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 57 #endif 58 59 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 60 { 61 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 62 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 63 PetscScalar *va, *vv; 64 Vec vB, vA; 65 const PetscScalar *vb; 66 67 PetscFunctionBegin; 68 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 69 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 70 71 PetscCall(VecGetArrayWrite(vA, &va)); 72 if (idx) { 73 for (i = 0; i < m; i++) { 74 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 75 } 76 } 77 78 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 79 PetscCall(PetscMalloc1(m, &idxb)); 80 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 81 82 PetscCall(VecGetArrayWrite(v, &vv)); 83 PetscCall(VecGetArrayRead(vB, &vb)); 84 for (i = 0; i < m; i++) { 85 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 86 vv[i] = vb[i]; 87 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 88 } else { 89 vv[i] = va[i]; 90 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 91 } 92 } 93 PetscCall(VecRestoreArrayWrite(vA, &vv)); 94 PetscCall(VecRestoreArrayWrite(vA, &va)); 95 PetscCall(VecRestoreArrayRead(vB, &vb)); 96 PetscCall(PetscFree(idxb)); 97 PetscCall(VecDestroy(&vA)); 98 PetscCall(VecDestroy(&vB)); 99 PetscFunctionReturn(PETSC_SUCCESS); 100 } 101 102 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 103 { 104 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 105 106 PetscFunctionBegin; 107 PetscCall(MatStoreValues(aij->A)); 108 PetscCall(MatStoreValues(aij->B)); 109 PetscFunctionReturn(PETSC_SUCCESS); 110 } 111 112 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 113 { 114 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 115 116 PetscFunctionBegin; 117 PetscCall(MatRetrieveValues(aij->A)); 118 PetscCall(MatRetrieveValues(aij->B)); 119 PetscFunctionReturn(PETSC_SUCCESS); 120 } 121 122 /* 123 Local utility routine that creates a mapping from the global column 124 number to the local number in the off-diagonal part of the local 125 storage of the matrix. This is done in a non scalable way since the 126 length of colmap equals the global matrix length. 127 */ 128 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 129 { 130 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 131 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 132 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 133 134 PetscFunctionBegin; 135 #if defined(PETSC_USE_CTABLE) 136 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 137 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 138 #else 139 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 140 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 141 #endif 142 PetscFunctionReturn(PETSC_SUCCESS); 143 } 144 145 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 146 do { \ 147 brow = row / bs; \ 148 rp = aj + ai[brow]; \ 149 ap = aa + bs2 * ai[brow]; \ 150 rmax = aimax[brow]; \ 151 nrow = ailen[brow]; \ 152 bcol = col / bs; \ 153 ridx = row % bs; \ 154 cidx = col % bs; \ 155 low = 0; \ 156 high = nrow; \ 157 while (high - low > 3) { \ 158 t = (low + high) / 2; \ 159 if (rp[t] > bcol) high = t; \ 160 else low = t; \ 161 } \ 162 for (_i = low; _i < high; _i++) { \ 163 if (rp[_i] > bcol) break; \ 164 if (rp[_i] == bcol) { \ 165 bap = ap + bs2 * _i + bs * cidx + ridx; \ 166 if (addv == ADD_VALUES) *bap += value; \ 167 else *bap = value; \ 168 goto a_noinsert; \ 169 } \ 170 } \ 171 if (a->nonew == 1) goto a_noinsert; \ 172 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 173 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 174 N = nrow++ - 1; \ 175 /* shift up all the later entries in this row */ \ 176 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 177 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 178 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 179 rp[_i] = bcol; \ 180 ap[bs2 * _i + bs * cidx + ridx] = value; \ 181 a_noinsert:; \ 182 ailen[brow] = nrow; \ 183 } while (0) 184 185 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 186 do { \ 187 brow = row / bs; \ 188 rp = bj + bi[brow]; \ 189 ap = ba + bs2 * bi[brow]; \ 190 rmax = bimax[brow]; \ 191 nrow = bilen[brow]; \ 192 bcol = col / bs; \ 193 ridx = row % bs; \ 194 cidx = col % bs; \ 195 low = 0; \ 196 high = nrow; \ 197 while (high - low > 3) { \ 198 t = (low + high) / 2; \ 199 if (rp[t] > bcol) high = t; \ 200 else low = t; \ 201 } \ 202 for (_i = low; _i < high; _i++) { \ 203 if (rp[_i] > bcol) break; \ 204 if (rp[_i] == bcol) { \ 205 bap = ap + bs2 * _i + bs * cidx + ridx; \ 206 if (addv == ADD_VALUES) *bap += value; \ 207 else *bap = value; \ 208 goto b_noinsert; \ 209 } \ 210 } \ 211 if (b->nonew == 1) goto b_noinsert; \ 212 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 213 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 214 N = nrow++ - 1; \ 215 /* shift up all the later entries in this row */ \ 216 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 217 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 218 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 219 rp[_i] = bcol; \ 220 ap[bs2 * _i + bs * cidx + ridx] = value; \ 221 b_noinsert:; \ 222 bilen[brow] = nrow; \ 223 } while (0) 224 225 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 226 { 227 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 228 MatScalar value; 229 PetscBool roworiented = baij->roworiented; 230 PetscInt i, j, row, col; 231 PetscInt rstart_orig = mat->rmap->rstart; 232 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 233 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 234 235 /* Some Variables required in the macro */ 236 Mat A = baij->A; 237 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 238 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 239 MatScalar *aa = a->a; 240 241 Mat B = baij->B; 242 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 243 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 244 MatScalar *ba = b->a; 245 246 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 247 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 248 MatScalar *ap, *bap; 249 250 PetscFunctionBegin; 251 for (i = 0; i < m; i++) { 252 if (im[i] < 0) continue; 253 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 254 if (im[i] >= rstart_orig && im[i] < rend_orig) { 255 row = im[i] - rstart_orig; 256 for (j = 0; j < n; j++) { 257 if (in[j] >= cstart_orig && in[j] < cend_orig) { 258 col = in[j] - cstart_orig; 259 if (roworiented) value = v[i * n + j]; 260 else value = v[i + j * m]; 261 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 262 } else if (in[j] < 0) { 263 continue; 264 } else { 265 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 266 if (mat->was_assembled) { 267 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 268 #if defined(PETSC_USE_CTABLE) 269 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 270 col = col - 1; 271 #else 272 col = baij->colmap[in[j] / bs] - 1; 273 #endif 274 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 275 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 276 col = in[j]; 277 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 278 B = baij->B; 279 b = (Mat_SeqBAIJ *)(B)->data; 280 bimax = b->imax; 281 bi = b->i; 282 bilen = b->ilen; 283 bj = b->j; 284 ba = b->a; 285 } else { 286 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 287 col += in[j] % bs; 288 } 289 } else col = in[j]; 290 if (roworiented) value = v[i * n + j]; 291 else value = v[i + j * m]; 292 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 293 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 294 } 295 } 296 } else { 297 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 298 if (!baij->donotstash) { 299 mat->assembled = PETSC_FALSE; 300 if (roworiented) { 301 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 302 } else { 303 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 304 } 305 } 306 } 307 } 308 PetscFunctionReturn(PETSC_SUCCESS); 309 } 310 311 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 312 { 313 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 314 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 315 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 316 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 317 PetscBool roworiented = a->roworiented; 318 const PetscScalar *value = v; 319 MatScalar *ap, *aa = a->a, *bap; 320 321 PetscFunctionBegin; 322 rp = aj + ai[row]; 323 ap = aa + bs2 * ai[row]; 324 rmax = imax[row]; 325 nrow = ailen[row]; 326 value = v; 327 low = 0; 328 high = nrow; 329 while (high - low > 7) { 330 t = (low + high) / 2; 331 if (rp[t] > col) high = t; 332 else low = t; 333 } 334 for (i = low; i < high; i++) { 335 if (rp[i] > col) break; 336 if (rp[i] == col) { 337 bap = ap + bs2 * i; 338 if (roworiented) { 339 if (is == ADD_VALUES) { 340 for (ii = 0; ii < bs; ii++) { 341 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 342 } 343 } else { 344 for (ii = 0; ii < bs; ii++) { 345 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 346 } 347 } 348 } else { 349 if (is == ADD_VALUES) { 350 for (ii = 0; ii < bs; ii++, value += bs) { 351 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 352 bap += bs; 353 } 354 } else { 355 for (ii = 0; ii < bs; ii++, value += bs) { 356 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 357 bap += bs; 358 } 359 } 360 } 361 goto noinsert2; 362 } 363 } 364 if (nonew == 1) goto noinsert2; 365 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 366 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 367 N = nrow++ - 1; 368 high++; 369 /* shift up all the later entries in this row */ 370 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 371 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 372 rp[i] = col; 373 bap = ap + bs2 * i; 374 if (roworiented) { 375 for (ii = 0; ii < bs; ii++) { 376 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 377 } 378 } else { 379 for (ii = 0; ii < bs; ii++) { 380 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 381 } 382 } 383 noinsert2:; 384 ailen[row] = nrow; 385 PetscFunctionReturn(PETSC_SUCCESS); 386 } 387 388 /* 389 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 390 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 391 */ 392 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 393 { 394 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 395 const PetscScalar *value; 396 MatScalar *barray = baij->barray; 397 PetscBool roworiented = baij->roworiented; 398 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 399 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 400 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 401 402 PetscFunctionBegin; 403 if (!barray) { 404 PetscCall(PetscMalloc1(bs2, &barray)); 405 baij->barray = barray; 406 } 407 408 if (roworiented) stepval = (n - 1) * bs; 409 else stepval = (m - 1) * bs; 410 411 for (i = 0; i < m; i++) { 412 if (im[i] < 0) continue; 413 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 414 if (im[i] >= rstart && im[i] < rend) { 415 row = im[i] - rstart; 416 for (j = 0; j < n; j++) { 417 /* If NumCol = 1 then a copy is not required */ 418 if ((roworiented) && (n == 1)) { 419 barray = (MatScalar *)v + i * bs2; 420 } else if ((!roworiented) && (m == 1)) { 421 barray = (MatScalar *)v + j * bs2; 422 } else { /* Here a copy is required */ 423 if (roworiented) { 424 value = v + (i * (stepval + bs) + j) * bs; 425 } else { 426 value = v + (j * (stepval + bs) + i) * bs; 427 } 428 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 429 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 430 barray += bs; 431 } 432 barray -= bs2; 433 } 434 435 if (in[j] >= cstart && in[j] < cend) { 436 col = in[j] - cstart; 437 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 438 } else if (in[j] < 0) { 439 continue; 440 } else { 441 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 442 if (mat->was_assembled) { 443 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 444 445 #if defined(PETSC_USE_DEBUG) 446 #if defined(PETSC_USE_CTABLE) 447 { 448 PetscInt data; 449 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 450 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 451 } 452 #else 453 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 454 #endif 455 #endif 456 #if defined(PETSC_USE_CTABLE) 457 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 458 col = (col - 1) / bs; 459 #else 460 col = (baij->colmap[in[j]] - 1) / bs; 461 #endif 462 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 463 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 464 col = in[j]; 465 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 466 } else col = in[j]; 467 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 468 } 469 } 470 } else { 471 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 472 if (!baij->donotstash) { 473 if (roworiented) { 474 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } else { 476 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 477 } 478 } 479 } 480 } 481 PetscFunctionReturn(PETSC_SUCCESS); 482 } 483 484 #define HASH_KEY 0.6180339887 485 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 486 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 487 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 488 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 489 { 490 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 491 PetscBool roworiented = baij->roworiented; 492 PetscInt i, j, row, col; 493 PetscInt rstart_orig = mat->rmap->rstart; 494 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 495 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 496 PetscReal tmp; 497 MatScalar **HD = baij->hd, value; 498 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 499 500 PetscFunctionBegin; 501 for (i = 0; i < m; i++) { 502 if (PetscDefined(USE_DEBUG)) { 503 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 504 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 505 } 506 row = im[i]; 507 if (row >= rstart_orig && row < rend_orig) { 508 for (j = 0; j < n; j++) { 509 col = in[j]; 510 if (roworiented) value = v[i * n + j]; 511 else value = v[i + j * m]; 512 /* Look up PetscInto the Hash Table */ 513 key = (row / bs) * Nbs + (col / bs) + 1; 514 h1 = HASH(size, key, tmp); 515 516 idx = h1; 517 if (PetscDefined(USE_DEBUG)) { 518 insert_ct++; 519 total_ct++; 520 if (HT[idx] != key) { 521 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 522 ; 523 if (idx == size) { 524 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 525 ; 526 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 527 } 528 } 529 } else if (HT[idx] != key) { 530 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 531 ; 532 if (idx == size) { 533 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 534 ; 535 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 536 } 537 } 538 /* A HASH table entry is found, so insert the values at the correct address */ 539 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 540 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 541 } 542 } else if (!baij->donotstash) { 543 if (roworiented) { 544 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 545 } else { 546 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 547 } 548 } 549 } 550 if (PetscDefined(USE_DEBUG)) { 551 baij->ht_total_ct += total_ct; 552 baij->ht_insert_ct += insert_ct; 553 } 554 PetscFunctionReturn(PETSC_SUCCESS); 555 } 556 557 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 558 { 559 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 560 PetscBool roworiented = baij->roworiented; 561 PetscInt i, j, ii, jj, row, col; 562 PetscInt rstart = baij->rstartbs; 563 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 564 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 565 PetscReal tmp; 566 MatScalar **HD = baij->hd, *baij_a; 567 const PetscScalar *v_t, *value; 568 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 569 570 PetscFunctionBegin; 571 if (roworiented) stepval = (n - 1) * bs; 572 else stepval = (m - 1) * bs; 573 574 for (i = 0; i < m; i++) { 575 if (PetscDefined(USE_DEBUG)) { 576 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 577 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 578 } 579 row = im[i]; 580 v_t = v + i * nbs2; 581 if (row >= rstart && row < rend) { 582 for (j = 0; j < n; j++) { 583 col = in[j]; 584 585 /* Look up into the Hash Table */ 586 key = row * Nbs + col + 1; 587 h1 = HASH(size, key, tmp); 588 589 idx = h1; 590 if (PetscDefined(USE_DEBUG)) { 591 total_ct++; 592 insert_ct++; 593 if (HT[idx] != key) { 594 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 595 ; 596 if (idx == size) { 597 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 598 ; 599 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 600 } 601 } 602 } else if (HT[idx] != key) { 603 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 604 ; 605 if (idx == size) { 606 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 607 ; 608 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 609 } 610 } 611 baij_a = HD[idx]; 612 if (roworiented) { 613 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 614 /* value = v + (i*(stepval+bs)+j)*bs; */ 615 value = v_t; 616 v_t += bs; 617 if (addv == ADD_VALUES) { 618 for (ii = 0; ii < bs; ii++, value += stepval) { 619 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 620 } 621 } else { 622 for (ii = 0; ii < bs; ii++, value += stepval) { 623 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 624 } 625 } 626 } else { 627 value = v + j * (stepval + bs) * bs + i * bs; 628 if (addv == ADD_VALUES) { 629 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 630 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 631 } 632 } else { 633 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 634 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 635 } 636 } 637 } 638 } 639 } else { 640 if (!baij->donotstash) { 641 if (roworiented) { 642 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } else { 644 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 645 } 646 } 647 } 648 } 649 if (PetscDefined(USE_DEBUG)) { 650 baij->ht_total_ct += total_ct; 651 baij->ht_insert_ct += insert_ct; 652 } 653 PetscFunctionReturn(PETSC_SUCCESS); 654 } 655 656 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 657 { 658 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 659 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 660 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 661 662 PetscFunctionBegin; 663 for (i = 0; i < m; i++) { 664 if (idxm[i] < 0) continue; /* negative row */ 665 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 666 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 667 row = idxm[i] - bsrstart; 668 for (j = 0; j < n; j++) { 669 if (idxn[j] < 0) continue; /* negative column */ 670 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 671 if (idxn[j] >= bscstart && idxn[j] < bscend) { 672 col = idxn[j] - bscstart; 673 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 674 } else { 675 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 676 #if defined(PETSC_USE_CTABLE) 677 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 678 data--; 679 #else 680 data = baij->colmap[idxn[j] / bs] - 1; 681 #endif 682 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 683 else { 684 col = data + idxn[j] % bs; 685 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 686 } 687 } 688 } 689 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 690 } 691 PetscFunctionReturn(PETSC_SUCCESS); 692 } 693 694 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 695 { 696 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 697 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 698 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 699 PetscReal sum = 0.0; 700 MatScalar *v; 701 702 PetscFunctionBegin; 703 if (baij->size == 1) { 704 PetscCall(MatNorm(baij->A, type, nrm)); 705 } else { 706 if (type == NORM_FROBENIUS) { 707 v = amat->a; 708 nz = amat->nz * bs2; 709 for (i = 0; i < nz; i++) { 710 sum += PetscRealPart(PetscConj(*v) * (*v)); 711 v++; 712 } 713 v = bmat->a; 714 nz = bmat->nz * bs2; 715 for (i = 0; i < nz; i++) { 716 sum += PetscRealPart(PetscConj(*v) * (*v)); 717 v++; 718 } 719 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 720 *nrm = PetscSqrtReal(*nrm); 721 } else if (type == NORM_1) { /* max column sum */ 722 PetscReal *tmp, *tmp2; 723 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 724 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 725 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 726 v = amat->a; 727 jj = amat->j; 728 for (i = 0; i < amat->nz; i++) { 729 for (j = 0; j < bs; j++) { 730 col = bs * (cstart + *jj) + j; /* column index */ 731 for (row = 0; row < bs; row++) { 732 tmp[col] += PetscAbsScalar(*v); 733 v++; 734 } 735 } 736 jj++; 737 } 738 v = bmat->a; 739 jj = bmat->j; 740 for (i = 0; i < bmat->nz; i++) { 741 for (j = 0; j < bs; j++) { 742 col = bs * garray[*jj] + j; 743 for (row = 0; row < bs; row++) { 744 tmp[col] += PetscAbsScalar(*v); 745 v++; 746 } 747 } 748 jj++; 749 } 750 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 751 *nrm = 0.0; 752 for (j = 0; j < mat->cmap->N; j++) { 753 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 754 } 755 PetscCall(PetscFree(tmp)); 756 PetscCall(PetscFree(tmp2)); 757 } else if (type == NORM_INFINITY) { /* max row sum */ 758 PetscReal *sums; 759 PetscCall(PetscMalloc1(bs, &sums)); 760 sum = 0.0; 761 for (j = 0; j < amat->mbs; j++) { 762 for (row = 0; row < bs; row++) sums[row] = 0.0; 763 v = amat->a + bs2 * amat->i[j]; 764 nz = amat->i[j + 1] - amat->i[j]; 765 for (i = 0; i < nz; i++) { 766 for (col = 0; col < bs; col++) { 767 for (row = 0; row < bs; row++) { 768 sums[row] += PetscAbsScalar(*v); 769 v++; 770 } 771 } 772 } 773 v = bmat->a + bs2 * bmat->i[j]; 774 nz = bmat->i[j + 1] - bmat->i[j]; 775 for (i = 0; i < nz; i++) { 776 for (col = 0; col < bs; col++) { 777 for (row = 0; row < bs; row++) { 778 sums[row] += PetscAbsScalar(*v); 779 v++; 780 } 781 } 782 } 783 for (row = 0; row < bs; row++) { 784 if (sums[row] > sum) sum = sums[row]; 785 } 786 } 787 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 788 PetscCall(PetscFree(sums)); 789 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 790 } 791 PetscFunctionReturn(PETSC_SUCCESS); 792 } 793 794 /* 795 Creates the hash table, and sets the table 796 This table is created only once. 797 If new entried need to be added to the matrix 798 then the hash table has to be destroyed and 799 recreated. 800 */ 801 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 802 { 803 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 804 Mat A = baij->A, B = baij->B; 805 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 806 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 807 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 808 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 809 PetscInt *HT, key; 810 MatScalar **HD; 811 PetscReal tmp; 812 #if defined(PETSC_USE_INFO) 813 PetscInt ct = 0, max = 0; 814 #endif 815 816 PetscFunctionBegin; 817 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 818 819 baij->ht_size = (PetscInt)(factor * nz); 820 ht_size = baij->ht_size; 821 822 /* Allocate Memory for Hash Table */ 823 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 824 HD = baij->hd; 825 HT = baij->ht; 826 827 /* Loop Over A */ 828 for (i = 0; i < a->mbs; i++) { 829 for (j = ai[i]; j < ai[i + 1]; j++) { 830 row = i + rstart; 831 col = aj[j] + cstart; 832 833 key = row * Nbs + col + 1; 834 h1 = HASH(ht_size, key, tmp); 835 for (k = 0; k < ht_size; k++) { 836 if (!HT[(h1 + k) % ht_size]) { 837 HT[(h1 + k) % ht_size] = key; 838 HD[(h1 + k) % ht_size] = a->a + j * bs2; 839 break; 840 #if defined(PETSC_USE_INFO) 841 } else { 842 ct++; 843 #endif 844 } 845 } 846 #if defined(PETSC_USE_INFO) 847 if (k > max) max = k; 848 #endif 849 } 850 } 851 /* Loop Over B */ 852 for (i = 0; i < b->mbs; i++) { 853 for (j = bi[i]; j < bi[i + 1]; j++) { 854 row = i + rstart; 855 col = garray[bj[j]]; 856 key = row * Nbs + col + 1; 857 h1 = HASH(ht_size, key, tmp); 858 for (k = 0; k < ht_size; k++) { 859 if (!HT[(h1 + k) % ht_size]) { 860 HT[(h1 + k) % ht_size] = key; 861 HD[(h1 + k) % ht_size] = b->a + j * bs2; 862 break; 863 #if defined(PETSC_USE_INFO) 864 } else { 865 ct++; 866 #endif 867 } 868 } 869 #if defined(PETSC_USE_INFO) 870 if (k > max) max = k; 871 #endif 872 } 873 } 874 875 /* Print Summary */ 876 #if defined(PETSC_USE_INFO) 877 for (i = 0, j = 0; i < ht_size; i++) { 878 if (HT[i]) j++; 879 } 880 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 881 #endif 882 PetscFunctionReturn(PETSC_SUCCESS); 883 } 884 885 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 886 { 887 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 888 PetscInt nstash, reallocs; 889 890 PetscFunctionBegin; 891 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 892 893 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 894 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 895 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 898 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 899 PetscFunctionReturn(PETSC_SUCCESS); 900 } 901 902 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 903 { 904 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 905 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 906 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 907 PetscInt *row, *col; 908 PetscBool r1, r2, r3, other_disassembled; 909 MatScalar *val; 910 PetscMPIInt n; 911 912 PetscFunctionBegin; 913 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 914 if (!baij->donotstash && !mat->nooffprocentries) { 915 while (1) { 916 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 917 if (!flg) break; 918 919 for (i = 0; i < n;) { 920 /* Now identify the consecutive vals belonging to the same row */ 921 for (j = i, rstart = row[j]; j < n; j++) { 922 if (row[j] != rstart) break; 923 } 924 if (j < n) ncols = j - i; 925 else ncols = n - i; 926 /* Now assemble all these values with a single function call */ 927 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 928 i = j; 929 } 930 } 931 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 932 /* Now process the block-stash. Since the values are stashed column-oriented, 933 set the roworiented flag to column oriented, and after MatSetValues() 934 restore the original flags */ 935 r1 = baij->roworiented; 936 r2 = a->roworiented; 937 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 938 939 baij->roworiented = PETSC_FALSE; 940 a->roworiented = PETSC_FALSE; 941 942 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 943 while (1) { 944 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 945 if (!flg) break; 946 947 for (i = 0; i < n;) { 948 /* Now identify the consecutive vals belonging to the same row */ 949 for (j = i, rstart = row[j]; j < n; j++) { 950 if (row[j] != rstart) break; 951 } 952 if (j < n) ncols = j - i; 953 else ncols = n - i; 954 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 955 i = j; 956 } 957 } 958 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 959 960 baij->roworiented = r1; 961 a->roworiented = r2; 962 963 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 964 } 965 966 PetscCall(MatAssemblyBegin(baij->A, mode)); 967 PetscCall(MatAssemblyEnd(baij->A, mode)); 968 969 /* determine if any processor has disassembled, if so we must 970 also disassemble ourselves, in order that we may reassemble. */ 971 /* 972 if nonzero structure of submatrix B cannot change then we know that 973 no processor disassembled thus we can skip this stuff 974 */ 975 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 976 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 977 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 978 } 979 980 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 981 PetscCall(MatAssemblyBegin(baij->B, mode)); 982 PetscCall(MatAssemblyEnd(baij->B, mode)); 983 984 #if defined(PETSC_USE_INFO) 985 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 986 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 987 988 baij->ht_total_ct = 0; 989 baij->ht_insert_ct = 0; 990 } 991 #endif 992 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 993 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 994 995 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 996 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 997 } 998 999 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1000 1001 baij->rowvalues = NULL; 1002 1003 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1004 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1005 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1006 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1007 } 1008 PetscFunctionReturn(PETSC_SUCCESS); 1009 } 1010 1011 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1012 #include <petscdraw.h> 1013 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1014 { 1015 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1016 PetscMPIInt rank = baij->rank; 1017 PetscInt bs = mat->rmap->bs; 1018 PetscBool iascii, isdraw; 1019 PetscViewer sviewer; 1020 PetscViewerFormat format; 1021 1022 PetscFunctionBegin; 1023 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1024 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1025 if (iascii) { 1026 PetscCall(PetscViewerGetFormat(viewer, &format)); 1027 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1028 MatInfo info; 1029 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1030 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1032 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1033 mat->rmap->bs, (double)info.memory)); 1034 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1035 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1036 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1037 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1038 PetscCall(PetscViewerFlush(viewer)); 1039 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1040 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1041 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1042 PetscFunctionReturn(PETSC_SUCCESS); 1043 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1044 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1047 PetscFunctionReturn(PETSC_SUCCESS); 1048 } 1049 } 1050 1051 if (isdraw) { 1052 PetscDraw draw; 1053 PetscBool isnull; 1054 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1055 PetscCall(PetscDrawIsNull(draw, &isnull)); 1056 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1057 } 1058 1059 { 1060 /* assemble the entire matrix onto first processor. */ 1061 Mat A; 1062 Mat_SeqBAIJ *Aloc; 1063 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1064 MatScalar *a; 1065 const char *matname; 1066 1067 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1068 /* Perhaps this should be the type of mat? */ 1069 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1070 if (rank == 0) { 1071 PetscCall(MatSetSizes(A, M, N, M, N)); 1072 } else { 1073 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1074 } 1075 PetscCall(MatSetType(A, MATMPIBAIJ)); 1076 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1077 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1078 1079 /* copy over the A part */ 1080 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1081 ai = Aloc->i; 1082 aj = Aloc->j; 1083 a = Aloc->a; 1084 PetscCall(PetscMalloc1(bs, &rvals)); 1085 1086 for (i = 0; i < mbs; i++) { 1087 rvals[0] = bs * (baij->rstartbs + i); 1088 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1089 for (j = ai[i]; j < ai[i + 1]; j++) { 1090 col = (baij->cstartbs + aj[j]) * bs; 1091 for (k = 0; k < bs; k++) { 1092 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1093 col++; 1094 a += bs; 1095 } 1096 } 1097 } 1098 /* copy over the B part */ 1099 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1100 ai = Aloc->i; 1101 aj = Aloc->j; 1102 a = Aloc->a; 1103 for (i = 0; i < mbs; i++) { 1104 rvals[0] = bs * (baij->rstartbs + i); 1105 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1106 for (j = ai[i]; j < ai[i + 1]; j++) { 1107 col = baij->garray[aj[j]] * bs; 1108 for (k = 0; k < bs; k++) { 1109 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1110 col++; 1111 a += bs; 1112 } 1113 } 1114 } 1115 PetscCall(PetscFree(rvals)); 1116 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1117 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1118 /* 1119 Everyone has to call to draw the matrix since the graphics waits are 1120 synchronized across all processors that share the PetscDraw object 1121 */ 1122 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1123 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1124 if (rank == 0) { 1125 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1126 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1127 } 1128 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1129 PetscCall(PetscViewerFlush(viewer)); 1130 PetscCall(MatDestroy(&A)); 1131 } 1132 PetscFunctionReturn(PETSC_SUCCESS); 1133 } 1134 1135 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1136 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1137 { 1138 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1139 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1140 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1141 const PetscInt *garray = aij->garray; 1142 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1143 PetscInt64 nz, hnz; 1144 PetscInt *rowlens, *colidxs; 1145 PetscScalar *matvals; 1146 PetscMPIInt rank; 1147 1148 PetscFunctionBegin; 1149 PetscCall(PetscViewerSetUp(viewer)); 1150 1151 M = mat->rmap->N; 1152 N = mat->cmap->N; 1153 m = mat->rmap->n; 1154 rs = mat->rmap->rstart; 1155 cs = mat->cmap->rstart; 1156 bs = mat->rmap->bs; 1157 nz = bs * bs * (A->nz + B->nz); 1158 1159 /* write matrix header */ 1160 header[0] = MAT_FILE_CLASSID; 1161 header[1] = M; 1162 header[2] = N; 1163 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1164 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1165 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1166 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1167 1168 /* fill in and store row lengths */ 1169 PetscCall(PetscMalloc1(m, &rowlens)); 1170 for (cnt = 0, i = 0; i < A->mbs; i++) 1171 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1172 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1173 PetscCall(PetscFree(rowlens)); 1174 1175 /* fill in and store column indices */ 1176 PetscCall(PetscMalloc1(nz, &colidxs)); 1177 for (cnt = 0, i = 0; i < A->mbs; i++) { 1178 for (k = 0; k < bs; k++) { 1179 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1180 if (garray[B->j[jb]] > cs / bs) break; 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1182 } 1183 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1184 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1185 for (; jb < B->i[i + 1]; jb++) 1186 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1187 } 1188 } 1189 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1190 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1191 PetscCall(PetscFree(colidxs)); 1192 1193 /* fill in and store nonzero values */ 1194 PetscCall(PetscMalloc1(nz, &matvals)); 1195 for (cnt = 0, i = 0; i < A->mbs; i++) { 1196 for (k = 0; k < bs; k++) { 1197 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1198 if (garray[B->j[jb]] > cs / bs) break; 1199 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1200 } 1201 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1202 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1203 for (; jb < B->i[i + 1]; jb++) 1204 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1205 } 1206 } 1207 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1208 PetscCall(PetscFree(matvals)); 1209 1210 /* write block size option to the viewer's .info file */ 1211 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1212 PetscFunctionReturn(PETSC_SUCCESS); 1213 } 1214 1215 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1216 { 1217 PetscBool iascii, isdraw, issocket, isbinary; 1218 1219 PetscFunctionBegin; 1220 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1221 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1222 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1223 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1224 if (iascii || isdraw || issocket) { 1225 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1226 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1227 PetscFunctionReturn(PETSC_SUCCESS); 1228 } 1229 1230 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1231 { 1232 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1233 PetscInt nt; 1234 1235 PetscFunctionBegin; 1236 PetscCall(VecGetLocalSize(xx, &nt)); 1237 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1238 PetscCall(VecGetLocalSize(yy, &nt)); 1239 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1240 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1241 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1242 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1243 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1244 PetscFunctionReturn(PETSC_SUCCESS); 1245 } 1246 1247 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1248 { 1249 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1250 1251 PetscFunctionBegin; 1252 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1253 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1254 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1255 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1256 PetscFunctionReturn(PETSC_SUCCESS); 1257 } 1258 1259 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1260 { 1261 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1262 1263 PetscFunctionBegin; 1264 /* do nondiagonal part */ 1265 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1266 /* do local part */ 1267 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1268 /* add partial results together */ 1269 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1270 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1271 PetscFunctionReturn(PETSC_SUCCESS); 1272 } 1273 1274 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1275 { 1276 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1277 1278 PetscFunctionBegin; 1279 /* do nondiagonal part */ 1280 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1281 /* do local part */ 1282 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1283 /* add partial results together */ 1284 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1285 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1286 PetscFunctionReturn(PETSC_SUCCESS); 1287 } 1288 1289 /* 1290 This only works correctly for square matrices where the subblock A->A is the 1291 diagonal block 1292 */ 1293 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1294 { 1295 PetscFunctionBegin; 1296 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1297 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1298 PetscFunctionReturn(PETSC_SUCCESS); 1299 } 1300 1301 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1302 { 1303 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1304 1305 PetscFunctionBegin; 1306 PetscCall(MatScale(a->A, aa)); 1307 PetscCall(MatScale(a->B, aa)); 1308 PetscFunctionReturn(PETSC_SUCCESS); 1309 } 1310 1311 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1312 { 1313 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1314 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1315 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1316 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1317 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1318 1319 PetscFunctionBegin; 1320 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1321 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1322 mat->getrowactive = PETSC_TRUE; 1323 1324 if (!mat->rowvalues && (idx || v)) { 1325 /* 1326 allocate enough space to hold information from the longest row. 1327 */ 1328 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1329 PetscInt max = 1, mbs = mat->mbs, tmp; 1330 for (i = 0; i < mbs; i++) { 1331 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1332 if (max < tmp) max = tmp; 1333 } 1334 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1335 } 1336 lrow = row - brstart; 1337 1338 pvA = &vworkA; 1339 pcA = &cworkA; 1340 pvB = &vworkB; 1341 pcB = &cworkB; 1342 if (!v) { 1343 pvA = NULL; 1344 pvB = NULL; 1345 } 1346 if (!idx) { 1347 pcA = NULL; 1348 if (!v) pcB = NULL; 1349 } 1350 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1351 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1352 nztot = nzA + nzB; 1353 1354 cmap = mat->garray; 1355 if (v || idx) { 1356 if (nztot) { 1357 /* Sort by increasing column numbers, assuming A and B already sorted */ 1358 PetscInt imark = -1; 1359 if (v) { 1360 *v = v_p = mat->rowvalues; 1361 for (i = 0; i < nzB; i++) { 1362 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1363 else break; 1364 } 1365 imark = i; 1366 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1367 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1368 } 1369 if (idx) { 1370 *idx = idx_p = mat->rowindices; 1371 if (imark > -1) { 1372 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1373 } else { 1374 for (i = 0; i < nzB; i++) { 1375 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1376 else break; 1377 } 1378 imark = i; 1379 } 1380 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1381 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1382 } 1383 } else { 1384 if (idx) *idx = NULL; 1385 if (v) *v = NULL; 1386 } 1387 } 1388 *nz = nztot; 1389 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1390 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1391 PetscFunctionReturn(PETSC_SUCCESS); 1392 } 1393 1394 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1395 { 1396 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1397 1398 PetscFunctionBegin; 1399 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1400 baij->getrowactive = PETSC_FALSE; 1401 PetscFunctionReturn(PETSC_SUCCESS); 1402 } 1403 1404 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1405 { 1406 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1407 1408 PetscFunctionBegin; 1409 PetscCall(MatZeroEntries(l->A)); 1410 PetscCall(MatZeroEntries(l->B)); 1411 PetscFunctionReturn(PETSC_SUCCESS); 1412 } 1413 1414 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1415 { 1416 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1417 Mat A = a->A, B = a->B; 1418 PetscLogDouble isend[5], irecv[5]; 1419 1420 PetscFunctionBegin; 1421 info->block_size = (PetscReal)matin->rmap->bs; 1422 1423 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1424 1425 isend[0] = info->nz_used; 1426 isend[1] = info->nz_allocated; 1427 isend[2] = info->nz_unneeded; 1428 isend[3] = info->memory; 1429 isend[4] = info->mallocs; 1430 1431 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1432 1433 isend[0] += info->nz_used; 1434 isend[1] += info->nz_allocated; 1435 isend[2] += info->nz_unneeded; 1436 isend[3] += info->memory; 1437 isend[4] += info->mallocs; 1438 1439 if (flag == MAT_LOCAL) { 1440 info->nz_used = isend[0]; 1441 info->nz_allocated = isend[1]; 1442 info->nz_unneeded = isend[2]; 1443 info->memory = isend[3]; 1444 info->mallocs = isend[4]; 1445 } else if (flag == MAT_GLOBAL_MAX) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else if (flag == MAT_GLOBAL_SUM) { 1454 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1455 1456 info->nz_used = irecv[0]; 1457 info->nz_allocated = irecv[1]; 1458 info->nz_unneeded = irecv[2]; 1459 info->memory = irecv[3]; 1460 info->mallocs = irecv[4]; 1461 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1462 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1463 info->fill_ratio_needed = 0; 1464 info->factor_mallocs = 0; 1465 PetscFunctionReturn(PETSC_SUCCESS); 1466 } 1467 1468 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1469 { 1470 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1471 1472 PetscFunctionBegin; 1473 switch (op) { 1474 case MAT_NEW_NONZERO_LOCATIONS: 1475 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1476 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1477 case MAT_KEEP_NONZERO_PATTERN: 1478 case MAT_NEW_NONZERO_LOCATION_ERR: 1479 MatCheckPreallocated(A, 1); 1480 PetscCall(MatSetOption(a->A, op, flg)); 1481 PetscCall(MatSetOption(a->B, op, flg)); 1482 break; 1483 case MAT_ROW_ORIENTED: 1484 MatCheckPreallocated(A, 1); 1485 a->roworiented = flg; 1486 1487 PetscCall(MatSetOption(a->A, op, flg)); 1488 PetscCall(MatSetOption(a->B, op, flg)); 1489 break; 1490 case MAT_FORCE_DIAGONAL_ENTRIES: 1491 case MAT_SORTED_FULL: 1492 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1493 break; 1494 case MAT_IGNORE_OFF_PROC_ENTRIES: 1495 a->donotstash = flg; 1496 break; 1497 case MAT_USE_HASH_TABLE: 1498 a->ht_flag = flg; 1499 a->ht_fact = 1.39; 1500 break; 1501 case MAT_SYMMETRIC: 1502 case MAT_STRUCTURALLY_SYMMETRIC: 1503 case MAT_HERMITIAN: 1504 case MAT_SUBMAT_SINGLEIS: 1505 case MAT_SYMMETRY_ETERNAL: 1506 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1507 case MAT_SPD_ETERNAL: 1508 /* if the diagonal matrix is square it inherits some of the properties above */ 1509 break; 1510 default: 1511 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1512 } 1513 PetscFunctionReturn(PETSC_SUCCESS); 1514 } 1515 1516 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1517 { 1518 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1519 Mat_SeqBAIJ *Aloc; 1520 Mat B; 1521 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1522 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1523 MatScalar *a; 1524 1525 PetscFunctionBegin; 1526 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1527 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1528 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1529 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1530 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1531 /* Do not know preallocation information, but must set block size */ 1532 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1533 } else { 1534 B = *matout; 1535 } 1536 1537 /* copy over the A part */ 1538 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1539 ai = Aloc->i; 1540 aj = Aloc->j; 1541 a = Aloc->a; 1542 PetscCall(PetscMalloc1(bs, &rvals)); 1543 1544 for (i = 0; i < mbs; i++) { 1545 rvals[0] = bs * (baij->rstartbs + i); 1546 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1547 for (j = ai[i]; j < ai[i + 1]; j++) { 1548 col = (baij->cstartbs + aj[j]) * bs; 1549 for (k = 0; k < bs; k++) { 1550 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1551 1552 col++; 1553 a += bs; 1554 } 1555 } 1556 } 1557 /* copy over the B part */ 1558 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1559 ai = Aloc->i; 1560 aj = Aloc->j; 1561 a = Aloc->a; 1562 for (i = 0; i < mbs; i++) { 1563 rvals[0] = bs * (baij->rstartbs + i); 1564 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1565 for (j = ai[i]; j < ai[i + 1]; j++) { 1566 col = baij->garray[aj[j]] * bs; 1567 for (k = 0; k < bs; k++) { 1568 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1569 col++; 1570 a += bs; 1571 } 1572 } 1573 } 1574 PetscCall(PetscFree(rvals)); 1575 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1576 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1577 1578 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1579 else PetscCall(MatHeaderMerge(A, &B)); 1580 PetscFunctionReturn(PETSC_SUCCESS); 1581 } 1582 1583 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1584 { 1585 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1586 Mat a = baij->A, b = baij->B; 1587 PetscInt s1, s2, s3; 1588 1589 PetscFunctionBegin; 1590 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1591 if (rr) { 1592 PetscCall(VecGetLocalSize(rr, &s1)); 1593 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1594 /* Overlap communication with computation. */ 1595 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1596 } 1597 if (ll) { 1598 PetscCall(VecGetLocalSize(ll, &s1)); 1599 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1600 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1601 } 1602 /* scale the diagonal block */ 1603 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1604 1605 if (rr) { 1606 /* Do a scatter end and then right scale the off-diagonal block */ 1607 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1608 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1609 } 1610 PetscFunctionReturn(PETSC_SUCCESS); 1611 } 1612 1613 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1614 { 1615 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1616 PetscInt *lrows; 1617 PetscInt r, len; 1618 PetscBool cong; 1619 1620 PetscFunctionBegin; 1621 /* get locally owned rows */ 1622 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1623 /* fix right hand side if needed */ 1624 if (x && b) { 1625 const PetscScalar *xx; 1626 PetscScalar *bb; 1627 1628 PetscCall(VecGetArrayRead(x, &xx)); 1629 PetscCall(VecGetArray(b, &bb)); 1630 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1631 PetscCall(VecRestoreArrayRead(x, &xx)); 1632 PetscCall(VecRestoreArray(b, &bb)); 1633 } 1634 1635 /* actually zap the local rows */ 1636 /* 1637 Zero the required rows. If the "diagonal block" of the matrix 1638 is square and the user wishes to set the diagonal we use separate 1639 code so that MatSetValues() is not called for each diagonal allocating 1640 new memory, thus calling lots of mallocs and slowing things down. 1641 1642 */ 1643 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1644 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1645 PetscCall(MatHasCongruentLayouts(A, &cong)); 1646 if ((diag != 0.0) && cong) { 1647 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1648 } else if (diag != 0.0) { 1649 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1650 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1651 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1652 for (r = 0; r < len; ++r) { 1653 const PetscInt row = lrows[r] + A->rmap->rstart; 1654 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1655 } 1656 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1657 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1658 } else { 1659 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1660 } 1661 PetscCall(PetscFree(lrows)); 1662 1663 /* only change matrix nonzero state if pattern was allowed to be changed */ 1664 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1665 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1666 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1667 } 1668 PetscFunctionReturn(PETSC_SUCCESS); 1669 } 1670 1671 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1672 { 1673 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1674 PetscMPIInt n = A->rmap->n, p = 0; 1675 PetscInt i, j, k, r, len = 0, row, col, count; 1676 PetscInt *lrows, *owners = A->rmap->range; 1677 PetscSFNode *rrows; 1678 PetscSF sf; 1679 const PetscScalar *xx; 1680 PetscScalar *bb, *mask; 1681 Vec xmask, lmask; 1682 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1683 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1684 PetscScalar *aa; 1685 1686 PetscFunctionBegin; 1687 /* Create SF where leaves are input rows and roots are owned rows */ 1688 PetscCall(PetscMalloc1(n, &lrows)); 1689 for (r = 0; r < n; ++r) lrows[r] = -1; 1690 PetscCall(PetscMalloc1(N, &rrows)); 1691 for (r = 0; r < N; ++r) { 1692 const PetscInt idx = rows[r]; 1693 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1694 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1695 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1696 } 1697 rrows[r].rank = p; 1698 rrows[r].index = rows[r] - owners[p]; 1699 } 1700 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1701 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1702 /* Collect flags for rows to be zeroed */ 1703 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1704 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1705 PetscCall(PetscSFDestroy(&sf)); 1706 /* Compress and put in row numbers */ 1707 for (r = 0; r < n; ++r) 1708 if (lrows[r] >= 0) lrows[len++] = r; 1709 /* zero diagonal part of matrix */ 1710 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1711 /* handle off diagonal part of matrix */ 1712 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1713 PetscCall(VecDuplicate(l->lvec, &lmask)); 1714 PetscCall(VecGetArray(xmask, &bb)); 1715 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1716 PetscCall(VecRestoreArray(xmask, &bb)); 1717 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecDestroy(&xmask)); 1720 if (x) { 1721 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1722 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1723 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1724 PetscCall(VecGetArray(b, &bb)); 1725 } 1726 PetscCall(VecGetArray(lmask, &mask)); 1727 /* remove zeroed rows of off diagonal matrix */ 1728 for (i = 0; i < len; ++i) { 1729 row = lrows[i]; 1730 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1731 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1732 for (k = 0; k < count; ++k) { 1733 aa[0] = 0.0; 1734 aa += bs; 1735 } 1736 } 1737 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1738 for (i = 0; i < l->B->rmap->N; ++i) { 1739 row = i / bs; 1740 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1741 for (k = 0; k < bs; ++k) { 1742 col = bs * baij->j[j] + k; 1743 if (PetscAbsScalar(mask[col])) { 1744 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1745 if (x) bb[i] -= aa[0] * xx[col]; 1746 aa[0] = 0.0; 1747 } 1748 } 1749 } 1750 } 1751 if (x) { 1752 PetscCall(VecRestoreArray(b, &bb)); 1753 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1754 } 1755 PetscCall(VecRestoreArray(lmask, &mask)); 1756 PetscCall(VecDestroy(&lmask)); 1757 PetscCall(PetscFree(lrows)); 1758 1759 /* only change matrix nonzero state if pattern was allowed to be changed */ 1760 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1761 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1762 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1763 } 1764 PetscFunctionReturn(PETSC_SUCCESS); 1765 } 1766 1767 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1768 { 1769 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1770 1771 PetscFunctionBegin; 1772 PetscCall(MatSetUnfactored(a->A)); 1773 PetscFunctionReturn(PETSC_SUCCESS); 1774 } 1775 1776 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1777 1778 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1779 { 1780 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1781 Mat a, b, c, d; 1782 PetscBool flg; 1783 1784 PetscFunctionBegin; 1785 a = matA->A; 1786 b = matA->B; 1787 c = matB->A; 1788 d = matB->B; 1789 1790 PetscCall(MatEqual(a, c, &flg)); 1791 if (flg) PetscCall(MatEqual(b, d, &flg)); 1792 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1793 PetscFunctionReturn(PETSC_SUCCESS); 1794 } 1795 1796 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1797 { 1798 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1799 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1800 1801 PetscFunctionBegin; 1802 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1803 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1804 PetscCall(MatCopy_Basic(A, B, str)); 1805 } else { 1806 PetscCall(MatCopy(a->A, b->A, str)); 1807 PetscCall(MatCopy(a->B, b->B, str)); 1808 } 1809 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1810 PetscFunctionReturn(PETSC_SUCCESS); 1811 } 1812 1813 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1814 { 1815 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1816 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1817 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1818 1819 PetscFunctionBegin; 1820 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1821 PetscFunctionReturn(PETSC_SUCCESS); 1822 } 1823 1824 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1825 { 1826 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1827 PetscBLASInt bnz, one = 1; 1828 Mat_SeqBAIJ *x, *y; 1829 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1830 1831 PetscFunctionBegin; 1832 if (str == SAME_NONZERO_PATTERN) { 1833 PetscScalar alpha = a; 1834 x = (Mat_SeqBAIJ *)xx->A->data; 1835 y = (Mat_SeqBAIJ *)yy->A->data; 1836 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1837 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1838 x = (Mat_SeqBAIJ *)xx->B->data; 1839 y = (Mat_SeqBAIJ *)yy->B->data; 1840 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1841 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1842 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1843 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1844 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1845 } else { 1846 Mat B; 1847 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1848 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1849 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1850 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1851 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1852 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1853 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1854 PetscCall(MatSetType(B, MATMPIBAIJ)); 1855 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1856 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1857 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1858 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1859 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1860 PetscCall(MatHeaderMerge(Y, &B)); 1861 PetscCall(PetscFree(nnz_d)); 1862 PetscCall(PetscFree(nnz_o)); 1863 } 1864 PetscFunctionReturn(PETSC_SUCCESS); 1865 } 1866 1867 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1868 { 1869 PetscFunctionBegin; 1870 if (PetscDefined(USE_COMPLEX)) { 1871 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1872 1873 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1874 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1875 } 1876 PetscFunctionReturn(PETSC_SUCCESS); 1877 } 1878 1879 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1880 { 1881 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1882 1883 PetscFunctionBegin; 1884 PetscCall(MatRealPart(a->A)); 1885 PetscCall(MatRealPart(a->B)); 1886 PetscFunctionReturn(PETSC_SUCCESS); 1887 } 1888 1889 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1890 { 1891 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1892 1893 PetscFunctionBegin; 1894 PetscCall(MatImaginaryPart(a->A)); 1895 PetscCall(MatImaginaryPart(a->B)); 1896 PetscFunctionReturn(PETSC_SUCCESS); 1897 } 1898 1899 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1900 { 1901 IS iscol_local; 1902 PetscInt csize; 1903 1904 PetscFunctionBegin; 1905 PetscCall(ISGetLocalSize(iscol, &csize)); 1906 if (call == MAT_REUSE_MATRIX) { 1907 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1908 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1909 } else { 1910 PetscCall(ISAllGather(iscol, &iscol_local)); 1911 } 1912 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1913 if (call == MAT_INITIAL_MATRIX) { 1914 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1915 PetscCall(ISDestroy(&iscol_local)); 1916 } 1917 PetscFunctionReturn(PETSC_SUCCESS); 1918 } 1919 1920 /* 1921 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1922 in local and then by concatenating the local matrices the end result. 1923 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1924 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1925 */ 1926 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1927 { 1928 PetscMPIInt rank, size; 1929 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1930 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1931 Mat M, Mreuse; 1932 MatScalar *vwork, *aa; 1933 MPI_Comm comm; 1934 IS isrow_new, iscol_new; 1935 Mat_SeqBAIJ *aij; 1936 1937 PetscFunctionBegin; 1938 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1939 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1940 PetscCallMPI(MPI_Comm_size(comm, &size)); 1941 /* The compression and expansion should be avoided. Doesn't point 1942 out errors, might change the indices, hence buggey */ 1943 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1944 if (isrow == iscol) { 1945 iscol_new = isrow_new; 1946 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1947 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1948 1949 if (call == MAT_REUSE_MATRIX) { 1950 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1951 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1952 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1953 } else { 1954 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1955 } 1956 PetscCall(ISDestroy(&isrow_new)); 1957 PetscCall(ISDestroy(&iscol_new)); 1958 /* 1959 m - number of local rows 1960 n - number of columns (same on all processors) 1961 rstart - first row in new global matrix generated 1962 */ 1963 PetscCall(MatGetBlockSize(mat, &bs)); 1964 PetscCall(MatGetSize(Mreuse, &m, &n)); 1965 m = m / bs; 1966 n = n / bs; 1967 1968 if (call == MAT_INITIAL_MATRIX) { 1969 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1970 ii = aij->i; 1971 jj = aij->j; 1972 1973 /* 1974 Determine the number of non-zeros in the diagonal and off-diagonal 1975 portions of the matrix in order to do correct preallocation 1976 */ 1977 1978 /* first get start and end of "diagonal" columns */ 1979 if (csize == PETSC_DECIDE) { 1980 PetscCall(ISGetSize(isrow, &mglobal)); 1981 if (mglobal == n * bs) { /* square matrix */ 1982 nlocal = m; 1983 } else { 1984 nlocal = n / size + ((n % size) > rank); 1985 } 1986 } else { 1987 nlocal = csize / bs; 1988 } 1989 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1990 rstart = rend - nlocal; 1991 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1992 1993 /* next, compute all the lengths */ 1994 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1995 for (i = 0; i < m; i++) { 1996 jend = ii[i + 1] - ii[i]; 1997 olen = 0; 1998 dlen = 0; 1999 for (j = 0; j < jend; j++) { 2000 if (*jj < rstart || *jj >= rend) olen++; 2001 else dlen++; 2002 jj++; 2003 } 2004 olens[i] = olen; 2005 dlens[i] = dlen; 2006 } 2007 PetscCall(MatCreate(comm, &M)); 2008 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2009 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2010 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2011 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2012 PetscCall(PetscFree2(dlens, olens)); 2013 } else { 2014 PetscInt ml, nl; 2015 2016 M = *newmat; 2017 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2018 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2019 PetscCall(MatZeroEntries(M)); 2020 /* 2021 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2022 rather than the slower MatSetValues(). 2023 */ 2024 M->was_assembled = PETSC_TRUE; 2025 M->assembled = PETSC_FALSE; 2026 } 2027 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2028 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2029 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2030 ii = aij->i; 2031 jj = aij->j; 2032 aa = aij->a; 2033 for (i = 0; i < m; i++) { 2034 row = rstart / bs + i; 2035 nz = ii[i + 1] - ii[i]; 2036 cwork = jj; 2037 jj += nz; 2038 vwork = aa; 2039 aa += nz * bs * bs; 2040 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2041 } 2042 2043 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2044 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2045 *newmat = M; 2046 2047 /* save submatrix used in processor for next request */ 2048 if (call == MAT_INITIAL_MATRIX) { 2049 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2050 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2051 } 2052 PetscFunctionReturn(PETSC_SUCCESS); 2053 } 2054 2055 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2056 { 2057 MPI_Comm comm, pcomm; 2058 PetscInt clocal_size, nrows; 2059 const PetscInt *rows; 2060 PetscMPIInt size; 2061 IS crowp, lcolp; 2062 2063 PetscFunctionBegin; 2064 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2065 /* make a collective version of 'rowp' */ 2066 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2067 if (pcomm == comm) { 2068 crowp = rowp; 2069 } else { 2070 PetscCall(ISGetSize(rowp, &nrows)); 2071 PetscCall(ISGetIndices(rowp, &rows)); 2072 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2073 PetscCall(ISRestoreIndices(rowp, &rows)); 2074 } 2075 PetscCall(ISSetPermutation(crowp)); 2076 /* make a local version of 'colp' */ 2077 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2078 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2079 if (size == 1) { 2080 lcolp = colp; 2081 } else { 2082 PetscCall(ISAllGather(colp, &lcolp)); 2083 } 2084 PetscCall(ISSetPermutation(lcolp)); 2085 /* now we just get the submatrix */ 2086 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2087 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2088 /* clean up */ 2089 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2090 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2091 PetscFunctionReturn(PETSC_SUCCESS); 2092 } 2093 2094 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2095 { 2096 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2097 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2098 2099 PetscFunctionBegin; 2100 if (nghosts) *nghosts = B->nbs; 2101 if (ghosts) *ghosts = baij->garray; 2102 PetscFunctionReturn(PETSC_SUCCESS); 2103 } 2104 2105 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2106 { 2107 Mat B; 2108 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2109 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2110 Mat_SeqAIJ *b; 2111 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2112 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2113 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2114 2115 PetscFunctionBegin; 2116 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2117 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2118 2119 /* Tell every processor the number of nonzeros per row */ 2120 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2121 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2122 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2123 displs = recvcounts + size; 2124 for (i = 0; i < size; i++) { 2125 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2126 displs[i] = A->rmap->range[i] / bs; 2127 } 2128 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2129 /* Create the sequential matrix of the same type as the local block diagonal */ 2130 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2131 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2132 PetscCall(MatSetType(B, MATSEQAIJ)); 2133 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2134 b = (Mat_SeqAIJ *)B->data; 2135 2136 /* Copy my part of matrix column indices over */ 2137 sendcount = ad->nz + bd->nz; 2138 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2139 a_jsendbuf = ad->j; 2140 b_jsendbuf = bd->j; 2141 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2142 cnt = 0; 2143 for (i = 0; i < n; i++) { 2144 /* put in lower diagonal portion */ 2145 m = bd->i[i + 1] - bd->i[i]; 2146 while (m > 0) { 2147 /* is it above diagonal (in bd (compressed) numbering) */ 2148 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2149 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2150 m--; 2151 } 2152 2153 /* put in diagonal portion */ 2154 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2155 2156 /* put in upper diagonal portion */ 2157 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2158 } 2159 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2160 2161 /* Gather all column indices to all processors */ 2162 for (i = 0; i < size; i++) { 2163 recvcounts[i] = 0; 2164 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2165 } 2166 displs[0] = 0; 2167 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2168 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2169 /* Assemble the matrix into useable form (note numerical values not yet set) */ 2170 /* set the b->ilen (length of each row) values */ 2171 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2172 /* set the b->i indices */ 2173 b->i[0] = 0; 2174 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2175 PetscCall(PetscFree(lens)); 2176 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2177 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2178 PetscCall(PetscFree(recvcounts)); 2179 2180 PetscCall(MatPropagateSymmetryOptions(A, B)); 2181 *newmat = B; 2182 PetscFunctionReturn(PETSC_SUCCESS); 2183 } 2184 2185 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2186 { 2187 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2188 Vec bb1 = NULL; 2189 2190 PetscFunctionBegin; 2191 if (flag == SOR_APPLY_UPPER) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 PetscFunctionReturn(PETSC_SUCCESS); 2194 } 2195 2196 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2197 2198 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2199 if (flag & SOR_ZERO_INITIAL_GUESS) { 2200 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2201 its--; 2202 } 2203 2204 while (its--) { 2205 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2206 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2207 2208 /* update rhs: bb1 = bb - B*x */ 2209 PetscCall(VecScale(mat->lvec, -1.0)); 2210 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2211 2212 /* local sweep */ 2213 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2214 } 2215 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2216 if (flag & SOR_ZERO_INITIAL_GUESS) { 2217 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2218 its--; 2219 } 2220 while (its--) { 2221 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2222 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2223 2224 /* update rhs: bb1 = bb - B*x */ 2225 PetscCall(VecScale(mat->lvec, -1.0)); 2226 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2227 2228 /* local sweep */ 2229 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2230 } 2231 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2232 if (flag & SOR_ZERO_INITIAL_GUESS) { 2233 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2234 its--; 2235 } 2236 while (its--) { 2237 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2238 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2239 2240 /* update rhs: bb1 = bb - B*x */ 2241 PetscCall(VecScale(mat->lvec, -1.0)); 2242 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2243 2244 /* local sweep */ 2245 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2246 } 2247 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2248 2249 PetscCall(VecDestroy(&bb1)); 2250 PetscFunctionReturn(PETSC_SUCCESS); 2251 } 2252 2253 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2254 { 2255 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2256 PetscInt m, N, i, *garray = aij->garray; 2257 PetscInt ib, jb, bs = A->rmap->bs; 2258 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2259 MatScalar *a_val = a_aij->a; 2260 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2261 MatScalar *b_val = b_aij->a; 2262 PetscReal *work; 2263 2264 PetscFunctionBegin; 2265 PetscCall(MatGetSize(A, &m, &N)); 2266 PetscCall(PetscCalloc1(N, &work)); 2267 if (type == NORM_2) { 2268 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2272 a_val++; 2273 } 2274 } 2275 } 2276 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2277 for (jb = 0; jb < bs; jb++) { 2278 for (ib = 0; ib < bs; ib++) { 2279 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2280 b_val++; 2281 } 2282 } 2283 } 2284 } else if (type == NORM_1) { 2285 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2286 for (jb = 0; jb < bs; jb++) { 2287 for (ib = 0; ib < bs; ib++) { 2288 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2289 a_val++; 2290 } 2291 } 2292 } 2293 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2294 for (jb = 0; jb < bs; jb++) { 2295 for (ib = 0; ib < bs; ib++) { 2296 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2297 b_val++; 2298 } 2299 } 2300 } 2301 } else if (type == NORM_INFINITY) { 2302 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2303 for (jb = 0; jb < bs; jb++) { 2304 for (ib = 0; ib < bs; ib++) { 2305 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2306 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2307 a_val++; 2308 } 2309 } 2310 } 2311 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2312 for (jb = 0; jb < bs; jb++) { 2313 for (ib = 0; ib < bs; ib++) { 2314 int col = garray[b_aij->j[i]] * bs + jb; 2315 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2316 b_val++; 2317 } 2318 } 2319 } 2320 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2321 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2325 a_val++; 2326 } 2327 } 2328 } 2329 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2330 for (jb = 0; jb < bs; jb++) { 2331 for (ib = 0; ib < bs; ib++) { 2332 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2333 b_val++; 2334 } 2335 } 2336 } 2337 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2338 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2339 for (jb = 0; jb < bs; jb++) { 2340 for (ib = 0; ib < bs; ib++) { 2341 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2342 a_val++; 2343 } 2344 } 2345 } 2346 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2347 for (jb = 0; jb < bs; jb++) { 2348 for (ib = 0; ib < bs; ib++) { 2349 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2350 b_val++; 2351 } 2352 } 2353 } 2354 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2355 if (type == NORM_INFINITY) { 2356 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2357 } else { 2358 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2359 } 2360 PetscCall(PetscFree(work)); 2361 if (type == NORM_2) { 2362 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2363 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2364 for (i = 0; i < N; i++) reductions[i] /= m; 2365 } 2366 PetscFunctionReturn(PETSC_SUCCESS); 2367 } 2368 2369 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2370 { 2371 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2372 2373 PetscFunctionBegin; 2374 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2375 A->factorerrortype = a->A->factorerrortype; 2376 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2377 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2378 PetscFunctionReturn(PETSC_SUCCESS); 2379 } 2380 2381 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2382 { 2383 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2384 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2385 2386 PetscFunctionBegin; 2387 if (!Y->preallocated) { 2388 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2389 } else if (!aij->nz) { 2390 PetscInt nonew = aij->nonew; 2391 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2392 aij->nonew = nonew; 2393 } 2394 PetscCall(MatShift_Basic(Y, a)); 2395 PetscFunctionReturn(PETSC_SUCCESS); 2396 } 2397 2398 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2399 { 2400 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2401 2402 PetscFunctionBegin; 2403 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2404 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2405 if (d) { 2406 PetscInt rstart; 2407 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2408 *d += rstart / A->rmap->bs; 2409 } 2410 PetscFunctionReturn(PETSC_SUCCESS); 2411 } 2412 2413 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2414 { 2415 PetscFunctionBegin; 2416 *a = ((Mat_MPIBAIJ *)A->data)->A; 2417 PetscFunctionReturn(PETSC_SUCCESS); 2418 } 2419 2420 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2421 { 2422 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2423 2424 PetscFunctionBegin; 2425 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2426 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2427 PetscFunctionReturn(PETSC_SUCCESS); 2428 } 2429 2430 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2431 MatGetRow_MPIBAIJ, 2432 MatRestoreRow_MPIBAIJ, 2433 MatMult_MPIBAIJ, 2434 /* 4*/ MatMultAdd_MPIBAIJ, 2435 MatMultTranspose_MPIBAIJ, 2436 MatMultTransposeAdd_MPIBAIJ, 2437 NULL, 2438 NULL, 2439 NULL, 2440 /*10*/ NULL, 2441 NULL, 2442 NULL, 2443 MatSOR_MPIBAIJ, 2444 MatTranspose_MPIBAIJ, 2445 /*15*/ MatGetInfo_MPIBAIJ, 2446 MatEqual_MPIBAIJ, 2447 MatGetDiagonal_MPIBAIJ, 2448 MatDiagonalScale_MPIBAIJ, 2449 MatNorm_MPIBAIJ, 2450 /*20*/ MatAssemblyBegin_MPIBAIJ, 2451 MatAssemblyEnd_MPIBAIJ, 2452 MatSetOption_MPIBAIJ, 2453 MatZeroEntries_MPIBAIJ, 2454 /*24*/ MatZeroRows_MPIBAIJ, 2455 NULL, 2456 NULL, 2457 NULL, 2458 NULL, 2459 /*29*/ MatSetUp_MPI_Hash, 2460 NULL, 2461 NULL, 2462 MatGetDiagonalBlock_MPIBAIJ, 2463 NULL, 2464 /*34*/ MatDuplicate_MPIBAIJ, 2465 NULL, 2466 NULL, 2467 NULL, 2468 NULL, 2469 /*39*/ MatAXPY_MPIBAIJ, 2470 MatCreateSubMatrices_MPIBAIJ, 2471 MatIncreaseOverlap_MPIBAIJ, 2472 MatGetValues_MPIBAIJ, 2473 MatCopy_MPIBAIJ, 2474 /*44*/ NULL, 2475 MatScale_MPIBAIJ, 2476 MatShift_MPIBAIJ, 2477 NULL, 2478 MatZeroRowsColumns_MPIBAIJ, 2479 /*49*/ NULL, 2480 NULL, 2481 NULL, 2482 NULL, 2483 NULL, 2484 /*54*/ MatFDColoringCreate_MPIXAIJ, 2485 NULL, 2486 MatSetUnfactored_MPIBAIJ, 2487 MatPermute_MPIBAIJ, 2488 MatSetValuesBlocked_MPIBAIJ, 2489 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2490 MatDestroy_MPIBAIJ, 2491 MatView_MPIBAIJ, 2492 NULL, 2493 NULL, 2494 /*64*/ NULL, 2495 NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2500 NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*74*/ NULL, 2505 MatFDColoringApply_BAIJ, 2506 NULL, 2507 NULL, 2508 NULL, 2509 /*79*/ NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 MatLoad_MPIBAIJ, 2514 /*84*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 /*89*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*94*/ NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 NULL, 2529 /*99*/ NULL, 2530 NULL, 2531 NULL, 2532 MatConjugate_MPIBAIJ, 2533 NULL, 2534 /*104*/ NULL, 2535 MatRealPart_MPIBAIJ, 2536 MatImaginaryPart_MPIBAIJ, 2537 NULL, 2538 NULL, 2539 /*109*/ NULL, 2540 NULL, 2541 NULL, 2542 NULL, 2543 MatMissingDiagonal_MPIBAIJ, 2544 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2545 NULL, 2546 MatGetGhosts_MPIBAIJ, 2547 NULL, 2548 NULL, 2549 /*119*/ NULL, 2550 NULL, 2551 NULL, 2552 NULL, 2553 MatGetMultiProcBlock_MPIBAIJ, 2554 /*124*/ NULL, 2555 MatGetColumnReductions_MPIBAIJ, 2556 MatInvertBlockDiagonal_MPIBAIJ, 2557 NULL, 2558 NULL, 2559 /*129*/ NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 NULL, 2564 /*134*/ NULL, 2565 NULL, 2566 NULL, 2567 NULL, 2568 NULL, 2569 /*139*/ MatSetBlockSizes_Default, 2570 NULL, 2571 NULL, 2572 MatFDColoringSetUp_MPIXAIJ, 2573 NULL, 2574 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2575 NULL, 2576 NULL, 2577 NULL, 2578 NULL, 2579 NULL, 2580 /*150*/ NULL, 2581 MatEliminateZeros_MPIBAIJ}; 2582 2583 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2584 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2585 2586 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2587 { 2588 PetscInt m, rstart, cstart, cend; 2589 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2590 const PetscInt *JJ = NULL; 2591 PetscScalar *values = NULL; 2592 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2593 PetscBool nooffprocentries; 2594 2595 PetscFunctionBegin; 2596 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2597 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2598 PetscCall(PetscLayoutSetUp(B->rmap)); 2599 PetscCall(PetscLayoutSetUp(B->cmap)); 2600 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2601 m = B->rmap->n / bs; 2602 rstart = B->rmap->rstart / bs; 2603 cstart = B->cmap->rstart / bs; 2604 cend = B->cmap->rend / bs; 2605 2606 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2607 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2608 for (i = 0; i < m; i++) { 2609 nz = ii[i + 1] - ii[i]; 2610 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2611 nz_max = PetscMax(nz_max, nz); 2612 dlen = 0; 2613 olen = 0; 2614 JJ = jj + ii[i]; 2615 for (j = 0; j < nz; j++) { 2616 if (*JJ < cstart || *JJ >= cend) olen++; 2617 else dlen++; 2618 JJ++; 2619 } 2620 d_nnz[i] = dlen; 2621 o_nnz[i] = olen; 2622 } 2623 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2624 PetscCall(PetscFree2(d_nnz, o_nnz)); 2625 2626 values = (PetscScalar *)V; 2627 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2628 for (i = 0; i < m; i++) { 2629 PetscInt row = i + rstart; 2630 PetscInt ncols = ii[i + 1] - ii[i]; 2631 const PetscInt *icols = jj + ii[i]; 2632 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2633 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2634 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2635 } else { /* block ordering does not match so we can only insert one block at a time. */ 2636 PetscInt j; 2637 for (j = 0; j < ncols; j++) { 2638 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2639 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2640 } 2641 } 2642 } 2643 2644 if (!V) PetscCall(PetscFree(values)); 2645 nooffprocentries = B->nooffprocentries; 2646 B->nooffprocentries = PETSC_TRUE; 2647 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2648 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2649 B->nooffprocentries = nooffprocentries; 2650 2651 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2652 PetscFunctionReturn(PETSC_SUCCESS); 2653 } 2654 2655 /*@C 2656 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2657 2658 Collective 2659 2660 Input Parameters: 2661 + B - the matrix 2662 . bs - the block size 2663 . i - the indices into `j` for the start of each local row (starts with zero) 2664 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2665 - v - optional values in the matrix 2666 2667 Level: advanced 2668 2669 Notes: 2670 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2671 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2672 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2673 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2674 block column and the second index is over columns within a block. 2675 2676 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2677 2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2679 @*/ 2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2681 { 2682 PetscFunctionBegin; 2683 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2684 PetscValidType(B, 1); 2685 PetscValidLogicalCollectiveInt(B, bs, 2); 2686 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2691 { 2692 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2693 PetscInt i; 2694 PetscMPIInt size; 2695 2696 PetscFunctionBegin; 2697 if (B->hash_active) { 2698 B->ops[0] = b->cops; 2699 B->hash_active = PETSC_FALSE; 2700 } 2701 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2702 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2703 PetscCall(PetscLayoutSetUp(B->rmap)); 2704 PetscCall(PetscLayoutSetUp(B->cmap)); 2705 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2706 2707 if (d_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2709 } 2710 if (o_nnz) { 2711 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2712 } 2713 2714 b->bs2 = bs * bs; 2715 b->mbs = B->rmap->n / bs; 2716 b->nbs = B->cmap->n / bs; 2717 b->Mbs = B->rmap->N / bs; 2718 b->Nbs = B->cmap->N / bs; 2719 2720 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2721 b->rstartbs = B->rmap->rstart / bs; 2722 b->rendbs = B->rmap->rend / bs; 2723 b->cstartbs = B->cmap->rstart / bs; 2724 b->cendbs = B->cmap->rend / bs; 2725 2726 #if defined(PETSC_USE_CTABLE) 2727 PetscCall(PetscHMapIDestroy(&b->colmap)); 2728 #else 2729 PetscCall(PetscFree(b->colmap)); 2730 #endif 2731 PetscCall(PetscFree(b->garray)); 2732 PetscCall(VecDestroy(&b->lvec)); 2733 PetscCall(VecScatterDestroy(&b->Mvctx)); 2734 2735 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2736 PetscCall(MatDestroy(&b->B)); 2737 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2738 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2739 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2740 2741 PetscCall(MatDestroy(&b->A)); 2742 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2743 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2744 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2745 2746 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2747 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2748 B->preallocated = PETSC_TRUE; 2749 B->was_assembled = PETSC_FALSE; 2750 B->assembled = PETSC_FALSE; 2751 PetscFunctionReturn(PETSC_SUCCESS); 2752 } 2753 2754 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2755 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2756 2757 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2758 { 2759 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2760 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2761 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2762 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2763 2764 PetscFunctionBegin; 2765 PetscCall(PetscMalloc1(M + 1, &ii)); 2766 ii[0] = 0; 2767 for (i = 0; i < M; i++) { 2768 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2769 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2770 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2771 /* remove one from count of matrix has diagonal */ 2772 for (j = id[i]; j < id[i + 1]; j++) { 2773 if (jd[j] == i) { 2774 ii[i + 1]--; 2775 break; 2776 } 2777 } 2778 } 2779 PetscCall(PetscMalloc1(ii[M], &jj)); 2780 cnt = 0; 2781 for (i = 0; i < M; i++) { 2782 for (j = io[i]; j < io[i + 1]; j++) { 2783 if (garray[jo[j]] > rstart) break; 2784 jj[cnt++] = garray[jo[j]]; 2785 } 2786 for (k = id[i]; k < id[i + 1]; k++) { 2787 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2788 } 2789 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2790 } 2791 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2792 PetscFunctionReturn(PETSC_SUCCESS); 2793 } 2794 2795 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2798 2799 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2800 { 2801 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2802 Mat_MPIAIJ *b; 2803 Mat B; 2804 2805 PetscFunctionBegin; 2806 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2807 2808 if (reuse == MAT_REUSE_MATRIX) { 2809 B = *newmat; 2810 } else { 2811 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2812 PetscCall(MatSetType(B, MATMPIAIJ)); 2813 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2814 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2815 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2816 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2817 } 2818 b = (Mat_MPIAIJ *)B->data; 2819 2820 if (reuse == MAT_REUSE_MATRIX) { 2821 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2822 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2823 } else { 2824 PetscInt *garray = a->garray; 2825 Mat_SeqAIJ *bB; 2826 PetscInt bs, nnz; 2827 PetscCall(MatDestroy(&b->A)); 2828 PetscCall(MatDestroy(&b->B)); 2829 /* just clear out the data structure */ 2830 PetscCall(MatDisAssemble_MPIAIJ(B)); 2831 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2832 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2833 2834 /* Global numbering for b->B columns */ 2835 bB = (Mat_SeqAIJ *)b->B->data; 2836 bs = A->rmap->bs; 2837 nnz = bB->i[A->rmap->n]; 2838 for (PetscInt k = 0; k < nnz; k++) { 2839 PetscInt bj = bB->j[k] / bs; 2840 PetscInt br = bB->j[k] % bs; 2841 bB->j[k] = garray[bj] * bs + br; 2842 } 2843 } 2844 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2845 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2846 2847 if (reuse == MAT_INPLACE_MATRIX) { 2848 PetscCall(MatHeaderReplace(A, &B)); 2849 } else { 2850 *newmat = B; 2851 } 2852 PetscFunctionReturn(PETSC_SUCCESS); 2853 } 2854 2855 /*MC 2856 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2857 2858 Options Database Keys: 2859 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2860 . -mat_block_size <bs> - set the blocksize used to store the matrix 2861 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2862 - -mat_use_hash_table <fact> - set hash table factor 2863 2864 Level: beginner 2865 2866 Note: 2867 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2868 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2869 2870 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2871 M*/ 2872 2873 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2874 2875 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2876 { 2877 Mat_MPIBAIJ *b; 2878 PetscBool flg = PETSC_FALSE; 2879 2880 PetscFunctionBegin; 2881 PetscCall(PetscNew(&b)); 2882 B->data = (void *)b; 2883 B->ops[0] = MatOps_Values; 2884 B->assembled = PETSC_FALSE; 2885 2886 B->insertmode = NOT_SET_VALUES; 2887 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2888 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2889 2890 /* build local table of row and column ownerships */ 2891 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2892 2893 /* build cache for off array entries formed */ 2894 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2895 2896 b->donotstash = PETSC_FALSE; 2897 b->colmap = NULL; 2898 b->garray = NULL; 2899 b->roworiented = PETSC_TRUE; 2900 2901 /* stuff used in block assembly */ 2902 b->barray = NULL; 2903 2904 /* stuff used for matrix vector multiply */ 2905 b->lvec = NULL; 2906 b->Mvctx = NULL; 2907 2908 /* stuff for MatGetRow() */ 2909 b->rowindices = NULL; 2910 b->rowvalues = NULL; 2911 b->getrowactive = PETSC_FALSE; 2912 2913 /* hash table stuff */ 2914 b->ht = NULL; 2915 b->hd = NULL; 2916 b->ht_size = 0; 2917 b->ht_flag = PETSC_FALSE; 2918 b->ht_fact = 0; 2919 b->ht_total_ct = 0; 2920 b->ht_insert_ct = 0; 2921 2922 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2923 b->ijonly = PETSC_FALSE; 2924 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2928 #if defined(PETSC_HAVE_HYPRE) 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2930 #endif 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2938 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2939 2940 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2941 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2942 if (flg) { 2943 PetscReal fact = 1.39; 2944 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2945 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2946 if (fact <= 1.0) fact = 1.39; 2947 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2948 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2949 } 2950 PetscOptionsEnd(); 2951 PetscFunctionReturn(PETSC_SUCCESS); 2952 } 2953 2954 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2955 /*MC 2956 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2957 2958 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2959 and `MATMPIBAIJ` otherwise. 2960 2961 Options Database Keys: 2962 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2963 2964 Level: beginner 2965 2966 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2967 M*/ 2968 2969 /*@C 2970 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2971 (block compressed row). 2972 2973 Collective 2974 2975 Input Parameters: 2976 + B - the matrix 2977 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2978 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2979 . d_nz - number of block nonzeros per block row in diagonal portion of local 2980 submatrix (same for all local rows) 2981 . d_nnz - array containing the number of block nonzeros in the various block rows 2982 of the in diagonal portion of the local (possibly different for each block 2983 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2984 set it even if it is zero. 2985 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2986 submatrix (same for all local rows). 2987 - o_nnz - array containing the number of nonzeros in the various block rows of the 2988 off-diagonal portion of the local submatrix (possibly different for 2989 each block row) or `NULL`. 2990 2991 If the *_nnz parameter is given then the *_nz parameter is ignored 2992 2993 Options Database Keys: 2994 + -mat_block_size - size of the blocks to use 2995 - -mat_use_hash_table <fact> - set hash table factor 2996 2997 Level: intermediate 2998 2999 Notes: 3000 For good matrix assembly performance 3001 the user should preallocate the matrix storage by setting the parameters 3002 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3003 performance can be increased by more than a factor of 50. 3004 3005 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3006 than it must be used on all processors that share the object for that argument. 3007 3008 Storage Information: 3009 For a square global matrix we define each processor's diagonal portion 3010 to be its local rows and the corresponding columns (a square submatrix); 3011 each processor's off-diagonal portion encompasses the remainder of the 3012 local matrix (a rectangular submatrix). 3013 3014 The user can specify preallocated storage for the diagonal part of 3015 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3016 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3017 memory allocation. Likewise, specify preallocated storage for the 3018 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3019 3020 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3021 the figure below we depict these three local rows and all columns (0-11). 3022 3023 .vb 3024 0 1 2 3 4 5 6 7 8 9 10 11 3025 -------------------------- 3026 row 3 |o o o d d d o o o o o o 3027 row 4 |o o o d d d o o o o o o 3028 row 5 |o o o d d d o o o o o o 3029 -------------------------- 3030 .ve 3031 3032 Thus, any entries in the d locations are stored in the d (diagonal) 3033 submatrix, and any entries in the o locations are stored in the 3034 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3035 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3036 3037 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3038 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3039 In general, for PDE problems in which most nonzeros are near the diagonal, 3040 one expects `d_nz` >> `o_nz`. 3041 3042 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3043 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3044 You can also run with the option `-info` and look for messages with the string 3045 malloc in them to see if additional memory allocation was needed. 3046 3047 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3048 @*/ 3049 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3050 { 3051 PetscFunctionBegin; 3052 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3053 PetscValidType(B, 1); 3054 PetscValidLogicalCollectiveInt(B, bs, 2); 3055 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3056 PetscFunctionReturn(PETSC_SUCCESS); 3057 } 3058 3059 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3060 /*@C 3061 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3062 (block compressed row). 3063 3064 Collective 3065 3066 Input Parameters: 3067 + comm - MPI communicator 3068 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3069 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3070 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3071 This value should be the same as the local size used in creating the 3072 y vector for the matrix-vector product y = Ax. 3073 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3074 This value should be the same as the local size used in creating the 3075 x vector for the matrix-vector product y = Ax. 3076 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3077 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3078 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3079 submatrix (same for all local rows) 3080 . d_nnz - array containing the number of nonzero blocks in the various block rows 3081 of the in diagonal portion of the local (possibly different for each block 3082 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3083 and set it even if it is zero. 3084 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3085 submatrix (same for all local rows). 3086 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3087 off-diagonal portion of the local submatrix (possibly different for 3088 each block row) or NULL. 3089 3090 Output Parameter: 3091 . A - the matrix 3092 3093 Options Database Keys: 3094 + -mat_block_size - size of the blocks to use 3095 - -mat_use_hash_table <fact> - set hash table factor 3096 3097 Level: intermediate 3098 3099 Notes: 3100 For good matrix assembly performance 3101 the user should preallocate the matrix storage by setting the parameters 3102 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3103 performance can be increased by more than a factor of 50. 3104 3105 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3106 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3107 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3108 3109 If the *_nnz parameter is given then the *_nz parameter is ignored 3110 3111 A nonzero block is any block that as 1 or more nonzeros in it 3112 3113 The user MUST specify either the local or global matrix dimensions 3114 (possibly both). 3115 3116 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3117 than it must be used on all processors that share the object for that argument. 3118 3119 Storage Information: 3120 For a square global matrix we define each processor's diagonal portion 3121 to be its local rows and the corresponding columns (a square submatrix); 3122 each processor's off-diagonal portion encompasses the remainder of the 3123 local matrix (a rectangular submatrix). 3124 3125 The user can specify preallocated storage for the diagonal part of 3126 the local submatrix with either d_nz or d_nnz (not both). Set 3127 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3128 memory allocation. Likewise, specify preallocated storage for the 3129 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3130 3131 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3132 the figure below we depict these three local rows and all columns (0-11). 3133 3134 .vb 3135 0 1 2 3 4 5 6 7 8 9 10 11 3136 -------------------------- 3137 row 3 |o o o d d d o o o o o o 3138 row 4 |o o o d d d o o o o o o 3139 row 5 |o o o d d d o o o o o o 3140 -------------------------- 3141 .ve 3142 3143 Thus, any entries in the d locations are stored in the d (diagonal) 3144 submatrix, and any entries in the o locations are stored in the 3145 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3146 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3147 3148 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3149 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3150 In general, for PDE problems in which most nonzeros are near the diagonal, 3151 one expects `d_nz` >> `o_nz`. 3152 3153 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3154 @*/ 3155 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3156 { 3157 PetscMPIInt size; 3158 3159 PetscFunctionBegin; 3160 PetscCall(MatCreate(comm, A)); 3161 PetscCall(MatSetSizes(*A, m, n, M, N)); 3162 PetscCallMPI(MPI_Comm_size(comm, &size)); 3163 if (size > 1) { 3164 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3165 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3166 } else { 3167 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3168 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3169 } 3170 PetscFunctionReturn(PETSC_SUCCESS); 3171 } 3172 3173 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3174 { 3175 Mat mat; 3176 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3177 PetscInt len = 0; 3178 3179 PetscFunctionBegin; 3180 *newmat = NULL; 3181 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3182 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3183 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3184 3185 mat->factortype = matin->factortype; 3186 mat->preallocated = PETSC_TRUE; 3187 mat->assembled = PETSC_TRUE; 3188 mat->insertmode = NOT_SET_VALUES; 3189 3190 a = (Mat_MPIBAIJ *)mat->data; 3191 mat->rmap->bs = matin->rmap->bs; 3192 a->bs2 = oldmat->bs2; 3193 a->mbs = oldmat->mbs; 3194 a->nbs = oldmat->nbs; 3195 a->Mbs = oldmat->Mbs; 3196 a->Nbs = oldmat->Nbs; 3197 3198 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3199 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3200 3201 a->size = oldmat->size; 3202 a->rank = oldmat->rank; 3203 a->donotstash = oldmat->donotstash; 3204 a->roworiented = oldmat->roworiented; 3205 a->rowindices = NULL; 3206 a->rowvalues = NULL; 3207 a->getrowactive = PETSC_FALSE; 3208 a->barray = NULL; 3209 a->rstartbs = oldmat->rstartbs; 3210 a->rendbs = oldmat->rendbs; 3211 a->cstartbs = oldmat->cstartbs; 3212 a->cendbs = oldmat->cendbs; 3213 3214 /* hash table stuff */ 3215 a->ht = NULL; 3216 a->hd = NULL; 3217 a->ht_size = 0; 3218 a->ht_flag = oldmat->ht_flag; 3219 a->ht_fact = oldmat->ht_fact; 3220 a->ht_total_ct = 0; 3221 a->ht_insert_ct = 0; 3222 3223 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3224 if (oldmat->colmap) { 3225 #if defined(PETSC_USE_CTABLE) 3226 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3227 #else 3228 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3229 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3230 #endif 3231 } else a->colmap = NULL; 3232 3233 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3234 PetscCall(PetscMalloc1(len, &a->garray)); 3235 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3236 } else a->garray = NULL; 3237 3238 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3239 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3240 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3241 3242 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3243 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3244 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3245 *newmat = mat; 3246 PetscFunctionReturn(PETSC_SUCCESS); 3247 } 3248 3249 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3250 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3251 { 3252 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3253 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3254 PetscScalar *matvals; 3255 3256 PetscFunctionBegin; 3257 PetscCall(PetscViewerSetUp(viewer)); 3258 3259 /* read in matrix header */ 3260 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3261 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3262 M = header[1]; 3263 N = header[2]; 3264 nz = header[3]; 3265 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3266 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3267 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3268 3269 /* set block sizes from the viewer's .info file */ 3270 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3271 /* set local sizes if not set already */ 3272 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3273 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3274 /* set global sizes if not set already */ 3275 if (mat->rmap->N < 0) mat->rmap->N = M; 3276 if (mat->cmap->N < 0) mat->cmap->N = N; 3277 PetscCall(PetscLayoutSetUp(mat->rmap)); 3278 PetscCall(PetscLayoutSetUp(mat->cmap)); 3279 3280 /* check if the matrix sizes are correct */ 3281 PetscCall(MatGetSize(mat, &rows, &cols)); 3282 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3283 PetscCall(MatGetBlockSize(mat, &bs)); 3284 PetscCall(MatGetLocalSize(mat, &m, &n)); 3285 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3286 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3287 mbs = m / bs; 3288 nbs = n / bs; 3289 3290 /* read in row lengths and build row indices */ 3291 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3292 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3293 rowidxs[0] = 0; 3294 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3295 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3296 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3297 3298 /* read in column indices and matrix values */ 3299 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3302 3303 { /* preallocate matrix storage */ 3304 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3305 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3306 PetscBool sbaij, done; 3307 PetscInt *d_nnz, *o_nnz; 3308 3309 PetscCall(PetscBTCreate(nbs, &bt)); 3310 PetscCall(PetscHSetICreate(&ht)); 3311 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3312 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3313 for (i = 0; i < mbs; i++) { 3314 PetscCall(PetscBTMemzero(nbs, bt)); 3315 PetscCall(PetscHSetIClear(ht)); 3316 for (k = 0; k < bs; k++) { 3317 PetscInt row = bs * i + k; 3318 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3319 PetscInt col = colidxs[j]; 3320 if (!sbaij || col >= row) { 3321 if (col >= cs && col < ce) { 3322 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3323 } else { 3324 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3325 if (done) o_nnz[i]++; 3326 } 3327 } 3328 } 3329 } 3330 } 3331 PetscCall(PetscBTDestroy(&bt)); 3332 PetscCall(PetscHSetIDestroy(&ht)); 3333 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3334 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3335 PetscCall(PetscFree2(d_nnz, o_nnz)); 3336 } 3337 3338 /* store matrix values */ 3339 for (i = 0; i < m; i++) { 3340 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3341 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3342 } 3343 3344 PetscCall(PetscFree(rowidxs)); 3345 PetscCall(PetscFree2(colidxs, matvals)); 3346 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3347 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3348 PetscFunctionReturn(PETSC_SUCCESS); 3349 } 3350 3351 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3352 { 3353 PetscBool isbinary; 3354 3355 PetscFunctionBegin; 3356 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3357 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3358 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3359 PetscFunctionReturn(PETSC_SUCCESS); 3360 } 3361 3362 /*@ 3363 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3364 3365 Input Parameters: 3366 + mat - the matrix 3367 - fact - factor 3368 3369 Options Database Key: 3370 . -mat_use_hash_table <fact> - provide the factor 3371 3372 Level: advanced 3373 3374 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3375 @*/ 3376 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3377 { 3378 PetscFunctionBegin; 3379 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3380 PetscFunctionReturn(PETSC_SUCCESS); 3381 } 3382 3383 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3384 { 3385 Mat_MPIBAIJ *baij; 3386 3387 PetscFunctionBegin; 3388 baij = (Mat_MPIBAIJ *)mat->data; 3389 baij->ht_fact = fact; 3390 PetscFunctionReturn(PETSC_SUCCESS); 3391 } 3392 3393 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3394 { 3395 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3396 PetscBool flg; 3397 3398 PetscFunctionBegin; 3399 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3400 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3401 if (Ad) *Ad = a->A; 3402 if (Ao) *Ao = a->B; 3403 if (colmap) *colmap = a->garray; 3404 PetscFunctionReturn(PETSC_SUCCESS); 3405 } 3406 3407 /* 3408 Special version for direct calls from Fortran (to eliminate two function call overheads 3409 */ 3410 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3411 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3412 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3413 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3414 #endif 3415 3416 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3417 /*@C 3418 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3419 3420 Collective 3421 3422 Input Parameters: 3423 + matin - the matrix 3424 . min - number of input rows 3425 . im - input rows 3426 . nin - number of input columns 3427 . in - input columns 3428 . v - numerical values input 3429 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3430 3431 Level: advanced 3432 3433 Developer Notes: 3434 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3435 3436 .seealso: `Mat`, `MatSetValuesBlocked()` 3437 @*/ 3438 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3439 { 3440 /* convert input arguments to C version */ 3441 Mat mat = *matin; 3442 PetscInt m = *min, n = *nin; 3443 InsertMode addv = *addvin; 3444 3445 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3446 const MatScalar *value; 3447 MatScalar *barray = baij->barray; 3448 PetscBool roworiented = baij->roworiented; 3449 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3450 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3451 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3452 3453 PetscFunctionBegin; 3454 /* tasks normally handled by MatSetValuesBlocked() */ 3455 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3456 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3457 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3458 if (mat->assembled) { 3459 mat->was_assembled = PETSC_TRUE; 3460 mat->assembled = PETSC_FALSE; 3461 } 3462 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3463 3464 if (!barray) { 3465 PetscCall(PetscMalloc1(bs2, &barray)); 3466 baij->barray = barray; 3467 } 3468 3469 if (roworiented) stepval = (n - 1) * bs; 3470 else stepval = (m - 1) * bs; 3471 3472 for (i = 0; i < m; i++) { 3473 if (im[i] < 0) continue; 3474 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3475 if (im[i] >= rstart && im[i] < rend) { 3476 row = im[i] - rstart; 3477 for (j = 0; j < n; j++) { 3478 /* If NumCol = 1 then a copy is not required */ 3479 if ((roworiented) && (n == 1)) { 3480 barray = (MatScalar *)v + i * bs2; 3481 } else if ((!roworiented) && (m == 1)) { 3482 barray = (MatScalar *)v + j * bs2; 3483 } else { /* Here a copy is required */ 3484 if (roworiented) { 3485 value = v + i * (stepval + bs) * bs + j * bs; 3486 } else { 3487 value = v + j * (stepval + bs) * bs + i * bs; 3488 } 3489 for (ii = 0; ii < bs; ii++, value += stepval) { 3490 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3491 } 3492 barray -= bs2; 3493 } 3494 3495 if (in[j] >= cstart && in[j] < cend) { 3496 col = in[j] - cstart; 3497 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3498 } else if (in[j] < 0) { 3499 continue; 3500 } else { 3501 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3502 if (mat->was_assembled) { 3503 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3504 3505 #if defined(PETSC_USE_DEBUG) 3506 #if defined(PETSC_USE_CTABLE) 3507 { 3508 PetscInt data; 3509 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3510 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3511 } 3512 #else 3513 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3514 #endif 3515 #endif 3516 #if defined(PETSC_USE_CTABLE) 3517 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3518 col = (col - 1) / bs; 3519 #else 3520 col = (baij->colmap[in[j]] - 1) / bs; 3521 #endif 3522 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3523 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3524 col = in[j]; 3525 } 3526 } else col = in[j]; 3527 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3528 } 3529 } 3530 } else { 3531 if (!baij->donotstash) { 3532 if (roworiented) { 3533 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3534 } else { 3535 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3536 } 3537 } 3538 } 3539 } 3540 3541 /* task normally handled by MatSetValuesBlocked() */ 3542 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3543 PetscFunctionReturn(PETSC_SUCCESS); 3544 } 3545 3546 /*@ 3547 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3548 CSR format the local rows. 3549 3550 Collective 3551 3552 Input Parameters: 3553 + comm - MPI communicator 3554 . bs - the block size, only a block size of 1 is supported 3555 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3556 . n - This value should be the same as the local size used in creating the 3557 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3558 calculated if N is given) For square matrices n is almost always m. 3559 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3560 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3561 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3562 . j - column indices 3563 - a - matrix values 3564 3565 Output Parameter: 3566 . mat - the matrix 3567 3568 Level: intermediate 3569 3570 Notes: 3571 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3572 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3573 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3574 3575 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3576 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3577 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3578 with column-major ordering within blocks. 3579 3580 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3581 3582 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3583 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3584 @*/ 3585 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3586 { 3587 PetscFunctionBegin; 3588 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3589 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3590 PetscCall(MatCreate(comm, mat)); 3591 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3592 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3593 PetscCall(MatSetBlockSize(*mat, bs)); 3594 PetscCall(MatSetUp(*mat)); 3595 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3596 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3597 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3598 PetscFunctionReturn(PETSC_SUCCESS); 3599 } 3600 3601 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3602 { 3603 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3604 PetscInt *indx; 3605 PetscScalar *values; 3606 3607 PetscFunctionBegin; 3608 PetscCall(MatGetSize(inmat, &m, &N)); 3609 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3610 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3611 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3612 PetscInt *bindx, rmax = a->rmax, j; 3613 PetscMPIInt rank, size; 3614 3615 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3616 mbs = m / bs; 3617 Nbs = N / cbs; 3618 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3619 nbs = n / cbs; 3620 3621 PetscCall(PetscMalloc1(rmax, &bindx)); 3622 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3623 3624 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3625 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3626 if (rank == size - 1) { 3627 /* Check sum(nbs) = Nbs */ 3628 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3629 } 3630 3631 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3632 for (i = 0; i < mbs; i++) { 3633 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3634 nnz = nnz / bs; 3635 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3636 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3637 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3638 } 3639 PetscCall(PetscFree(bindx)); 3640 3641 PetscCall(MatCreate(comm, outmat)); 3642 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3643 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3644 PetscCall(MatSetType(*outmat, MATBAIJ)); 3645 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3646 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3647 MatPreallocateEnd(dnz, onz); 3648 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3649 } 3650 3651 /* numeric phase */ 3652 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3653 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3654 3655 for (i = 0; i < m; i++) { 3656 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3657 Ii = i + rstart; 3658 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3659 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3660 } 3661 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3662 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3663 PetscFunctionReturn(PETSC_SUCCESS); 3664 } 3665