1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = aj + ai[brow]; \ 147 ap = aa + bs2 * ai[brow]; \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = bj + bi[brow]; \ 187 ap = ba + bs2 * bi[brow]; \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the roworiented flag to column oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 940 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 961 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 962 } 963 964 PetscCall(MatAssemblyBegin(baij->A, mode)); 965 PetscCall(MatAssemblyEnd(baij->A, mode)); 966 967 /* determine if any processor has disassembled, if so we must 968 also disassemble ourselves, in order that we may reassemble. */ 969 /* 970 if nonzero structure of submatrix B cannot change then we know that 971 no processor disassembled thus we can skip this stuff 972 */ 973 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 974 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 975 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 976 } 977 978 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 979 PetscCall(MatAssemblyBegin(baij->B, mode)); 980 PetscCall(MatAssemblyEnd(baij->B, mode)); 981 982 #if defined(PETSC_USE_INFO) 983 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 985 986 baij->ht_total_ct = 0; 987 baij->ht_insert_ct = 0; 988 } 989 #endif 990 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 991 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 992 993 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 994 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 995 } 996 997 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 998 999 baij->rowvalues = NULL; 1000 1001 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1002 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1003 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1004 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1005 } 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1010 #include <petscdraw.h> 1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1012 { 1013 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1014 PetscMPIInt rank = baij->rank; 1015 PetscInt bs = mat->rmap->bs; 1016 PetscBool iascii, isdraw; 1017 PetscViewer sviewer; 1018 PetscViewerFormat format; 1019 1020 PetscFunctionBegin; 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1022 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1023 if (iascii) { 1024 PetscCall(PetscViewerGetFormat(viewer, &format)); 1025 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1026 MatInfo info; 1027 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1028 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1031 mat->rmap->bs, (double)info.memory)); 1032 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1035 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1036 PetscCall(PetscViewerFlush(viewer)); 1037 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1038 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1039 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1042 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 } 1048 1049 if (isdraw) { 1050 PetscDraw draw; 1051 PetscBool isnull; 1052 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1053 PetscCall(PetscDrawIsNull(draw, &isnull)); 1054 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 { 1058 /* assemble the entire matrix onto first processor. */ 1059 Mat A; 1060 Mat_SeqBAIJ *Aloc; 1061 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1062 MatScalar *a; 1063 const char *matname; 1064 1065 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1066 /* Perhaps this should be the type of mat? */ 1067 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1068 if (rank == 0) { 1069 PetscCall(MatSetSizes(A, M, N, M, N)); 1070 } else { 1071 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1072 } 1073 PetscCall(MatSetType(A, MATMPIBAIJ)); 1074 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1075 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1076 1077 /* copy over the A part */ 1078 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1079 ai = Aloc->i; 1080 aj = Aloc->j; 1081 a = Aloc->a; 1082 PetscCall(PetscMalloc1(bs, &rvals)); 1083 1084 for (i = 0; i < mbs; i++) { 1085 rvals[0] = bs * (baij->rstartbs + i); 1086 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1087 for (j = ai[i]; j < ai[i + 1]; j++) { 1088 col = (baij->cstartbs + aj[j]) * bs; 1089 for (k = 0; k < bs; k++) { 1090 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1091 col++; 1092 a += bs; 1093 } 1094 } 1095 } 1096 /* copy over the B part */ 1097 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1098 ai = Aloc->i; 1099 aj = Aloc->j; 1100 a = Aloc->a; 1101 for (i = 0; i < mbs; i++) { 1102 rvals[0] = bs * (baij->rstartbs + i); 1103 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1104 for (j = ai[i]; j < ai[i + 1]; j++) { 1105 col = baij->garray[aj[j]] * bs; 1106 for (k = 0; k < bs; k++) { 1107 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1108 col++; 1109 a += bs; 1110 } 1111 } 1112 } 1113 PetscCall(PetscFree(rvals)); 1114 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1115 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1116 /* 1117 Everyone has to call to draw the matrix since the graphics waits are 1118 synchronized across all processors that share the PetscDraw object 1119 */ 1120 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1122 if (rank == 0) { 1123 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1124 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1125 } 1126 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1127 PetscCall(PetscViewerFlush(viewer)); 1128 PetscCall(MatDestroy(&A)); 1129 } 1130 PetscFunctionReturn(PETSC_SUCCESS); 1131 } 1132 1133 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1134 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1135 { 1136 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1137 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1138 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1139 const PetscInt *garray = aij->garray; 1140 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1141 PetscInt64 nz, hnz; 1142 PetscInt *rowlens, *colidxs; 1143 PetscScalar *matvals; 1144 PetscMPIInt rank; 1145 1146 PetscFunctionBegin; 1147 PetscCall(PetscViewerSetUp(viewer)); 1148 1149 M = mat->rmap->N; 1150 N = mat->cmap->N; 1151 m = mat->rmap->n; 1152 rs = mat->rmap->rstart; 1153 cs = mat->cmap->rstart; 1154 bs = mat->rmap->bs; 1155 nz = bs * bs * (A->nz + B->nz); 1156 1157 /* write matrix header */ 1158 header[0] = MAT_FILE_CLASSID; 1159 header[1] = M; 1160 header[2] = N; 1161 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1162 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1163 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1164 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1165 1166 /* fill in and store row lengths */ 1167 PetscCall(PetscMalloc1(m, &rowlens)); 1168 for (cnt = 0, i = 0; i < A->mbs; i++) 1169 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1170 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1171 PetscCall(PetscFree(rowlens)); 1172 1173 /* fill in and store column indices */ 1174 PetscCall(PetscMalloc1(nz, &colidxs)); 1175 for (cnt = 0, i = 0; i < A->mbs; i++) { 1176 for (k = 0; k < bs; k++) { 1177 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1178 if (garray[B->j[jb]] > cs / bs) break; 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1180 } 1181 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1182 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1183 for (; jb < B->i[i + 1]; jb++) 1184 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1185 } 1186 } 1187 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1188 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1189 PetscCall(PetscFree(colidxs)); 1190 1191 /* fill in and store nonzero values */ 1192 PetscCall(PetscMalloc1(nz, &matvals)); 1193 for (cnt = 0, i = 0; i < A->mbs; i++) { 1194 for (k = 0; k < bs; k++) { 1195 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1196 if (garray[B->j[jb]] > cs / bs) break; 1197 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1198 } 1199 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1200 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1201 for (; jb < B->i[i + 1]; jb++) 1202 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1203 } 1204 } 1205 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1206 PetscCall(PetscFree(matvals)); 1207 1208 /* write block size option to the viewer's .info file */ 1209 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1210 PetscFunctionReturn(PETSC_SUCCESS); 1211 } 1212 1213 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1214 { 1215 PetscBool iascii, isdraw, issocket, isbinary; 1216 1217 PetscFunctionBegin; 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1221 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1222 if (iascii || isdraw || issocket) { 1223 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1224 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1225 PetscFunctionReturn(PETSC_SUCCESS); 1226 } 1227 1228 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1229 { 1230 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1231 PetscInt nt; 1232 1233 PetscFunctionBegin; 1234 PetscCall(VecGetLocalSize(xx, &nt)); 1235 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1236 PetscCall(VecGetLocalSize(yy, &nt)); 1237 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1238 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1239 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1240 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1241 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1242 PetscFunctionReturn(PETSC_SUCCESS); 1243 } 1244 1245 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1246 { 1247 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1248 1249 PetscFunctionBegin; 1250 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1251 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1252 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1253 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1254 PetscFunctionReturn(PETSC_SUCCESS); 1255 } 1256 1257 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1258 { 1259 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1260 1261 PetscFunctionBegin; 1262 /* do nondiagonal part */ 1263 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1264 /* do local part */ 1265 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1266 /* add partial results together */ 1267 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1268 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1269 PetscFunctionReturn(PETSC_SUCCESS); 1270 } 1271 1272 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1273 { 1274 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1275 1276 PetscFunctionBegin; 1277 /* do nondiagonal part */ 1278 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1279 /* do local part */ 1280 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1281 /* add partial results together */ 1282 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1283 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 1287 /* 1288 This only works correctly for square matrices where the subblock A->A is the 1289 diagonal block 1290 */ 1291 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1292 { 1293 PetscFunctionBegin; 1294 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1295 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1296 PetscFunctionReturn(PETSC_SUCCESS); 1297 } 1298 1299 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1300 { 1301 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1302 1303 PetscFunctionBegin; 1304 PetscCall(MatScale(a->A, aa)); 1305 PetscCall(MatScale(a->B, aa)); 1306 PetscFunctionReturn(PETSC_SUCCESS); 1307 } 1308 1309 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1310 { 1311 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1312 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1313 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1314 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1315 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1316 1317 PetscFunctionBegin; 1318 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1319 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1320 mat->getrowactive = PETSC_TRUE; 1321 1322 if (!mat->rowvalues && (idx || v)) { 1323 /* 1324 allocate enough space to hold information from the longest row. 1325 */ 1326 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1327 PetscInt max = 1, mbs = mat->mbs, tmp; 1328 for (i = 0; i < mbs; i++) { 1329 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1330 if (max < tmp) max = tmp; 1331 } 1332 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1333 } 1334 lrow = row - brstart; 1335 1336 pvA = &vworkA; 1337 pcA = &cworkA; 1338 pvB = &vworkB; 1339 pcB = &cworkB; 1340 if (!v) { 1341 pvA = NULL; 1342 pvB = NULL; 1343 } 1344 if (!idx) { 1345 pcA = NULL; 1346 if (!v) pcB = NULL; 1347 } 1348 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1349 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1350 nztot = nzA + nzB; 1351 1352 cmap = mat->garray; 1353 if (v || idx) { 1354 if (nztot) { 1355 /* Sort by increasing column numbers, assuming A and B already sorted */ 1356 PetscInt imark = -1; 1357 if (v) { 1358 *v = v_p = mat->rowvalues; 1359 for (i = 0; i < nzB; i++) { 1360 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1361 else break; 1362 } 1363 imark = i; 1364 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1365 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1366 } 1367 if (idx) { 1368 *idx = idx_p = mat->rowindices; 1369 if (imark > -1) { 1370 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1371 } else { 1372 for (i = 0; i < nzB; i++) { 1373 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 else break; 1375 } 1376 imark = i; 1377 } 1378 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1379 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1380 } 1381 } else { 1382 if (idx) *idx = NULL; 1383 if (v) *v = NULL; 1384 } 1385 } 1386 *nz = nztot; 1387 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1388 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1389 PetscFunctionReturn(PETSC_SUCCESS); 1390 } 1391 1392 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1393 { 1394 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1395 1396 PetscFunctionBegin; 1397 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1398 baij->getrowactive = PETSC_FALSE; 1399 PetscFunctionReturn(PETSC_SUCCESS); 1400 } 1401 1402 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1403 { 1404 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1405 1406 PetscFunctionBegin; 1407 PetscCall(MatZeroEntries(l->A)); 1408 PetscCall(MatZeroEntries(l->B)); 1409 PetscFunctionReturn(PETSC_SUCCESS); 1410 } 1411 1412 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1413 { 1414 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1415 Mat A = a->A, B = a->B; 1416 PetscLogDouble isend[5], irecv[5]; 1417 1418 PetscFunctionBegin; 1419 info->block_size = (PetscReal)matin->rmap->bs; 1420 1421 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1422 1423 isend[0] = info->nz_used; 1424 isend[1] = info->nz_allocated; 1425 isend[2] = info->nz_unneeded; 1426 isend[3] = info->memory; 1427 isend[4] = info->mallocs; 1428 1429 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1430 1431 isend[0] += info->nz_used; 1432 isend[1] += info->nz_allocated; 1433 isend[2] += info->nz_unneeded; 1434 isend[3] += info->memory; 1435 isend[4] += info->mallocs; 1436 1437 if (flag == MAT_LOCAL) { 1438 info->nz_used = isend[0]; 1439 info->nz_allocated = isend[1]; 1440 info->nz_unneeded = isend[2]; 1441 info->memory = isend[3]; 1442 info->mallocs = isend[4]; 1443 } else if (flag == MAT_GLOBAL_MAX) { 1444 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1445 1446 info->nz_used = irecv[0]; 1447 info->nz_allocated = irecv[1]; 1448 info->nz_unneeded = irecv[2]; 1449 info->memory = irecv[3]; 1450 info->mallocs = irecv[4]; 1451 } else if (flag == MAT_GLOBAL_SUM) { 1452 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1453 1454 info->nz_used = irecv[0]; 1455 info->nz_allocated = irecv[1]; 1456 info->nz_unneeded = irecv[2]; 1457 info->memory = irecv[3]; 1458 info->mallocs = irecv[4]; 1459 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1460 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1461 info->fill_ratio_needed = 0; 1462 info->factor_mallocs = 0; 1463 PetscFunctionReturn(PETSC_SUCCESS); 1464 } 1465 1466 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1467 { 1468 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1469 1470 PetscFunctionBegin; 1471 switch (op) { 1472 case MAT_NEW_NONZERO_LOCATIONS: 1473 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1474 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1475 case MAT_KEEP_NONZERO_PATTERN: 1476 case MAT_NEW_NONZERO_LOCATION_ERR: 1477 MatCheckPreallocated(A, 1); 1478 PetscCall(MatSetOption(a->A, op, flg)); 1479 PetscCall(MatSetOption(a->B, op, flg)); 1480 break; 1481 case MAT_ROW_ORIENTED: 1482 MatCheckPreallocated(A, 1); 1483 a->roworiented = flg; 1484 1485 PetscCall(MatSetOption(a->A, op, flg)); 1486 PetscCall(MatSetOption(a->B, op, flg)); 1487 break; 1488 case MAT_FORCE_DIAGONAL_ENTRIES: 1489 case MAT_SORTED_FULL: 1490 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1491 break; 1492 case MAT_IGNORE_OFF_PROC_ENTRIES: 1493 a->donotstash = flg; 1494 break; 1495 case MAT_USE_HASH_TABLE: 1496 a->ht_flag = flg; 1497 a->ht_fact = 1.39; 1498 break; 1499 case MAT_SYMMETRIC: 1500 case MAT_STRUCTURALLY_SYMMETRIC: 1501 case MAT_HERMITIAN: 1502 case MAT_SUBMAT_SINGLEIS: 1503 case MAT_SYMMETRY_ETERNAL: 1504 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1505 case MAT_SPD_ETERNAL: 1506 /* if the diagonal matrix is square it inherits some of the properties above */ 1507 break; 1508 default: 1509 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1510 } 1511 PetscFunctionReturn(PETSC_SUCCESS); 1512 } 1513 1514 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1515 { 1516 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1517 Mat_SeqBAIJ *Aloc; 1518 Mat B; 1519 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1520 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1521 MatScalar *a; 1522 1523 PetscFunctionBegin; 1524 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1525 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1526 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1527 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1528 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1529 /* Do not know preallocation information, but must set block size */ 1530 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1531 } else { 1532 B = *matout; 1533 } 1534 1535 /* copy over the A part */ 1536 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1537 ai = Aloc->i; 1538 aj = Aloc->j; 1539 a = Aloc->a; 1540 PetscCall(PetscMalloc1(bs, &rvals)); 1541 1542 for (i = 0; i < mbs; i++) { 1543 rvals[0] = bs * (baij->rstartbs + i); 1544 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1545 for (j = ai[i]; j < ai[i + 1]; j++) { 1546 col = (baij->cstartbs + aj[j]) * bs; 1547 for (k = 0; k < bs; k++) { 1548 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1549 1550 col++; 1551 a += bs; 1552 } 1553 } 1554 } 1555 /* copy over the B part */ 1556 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1557 ai = Aloc->i; 1558 aj = Aloc->j; 1559 a = Aloc->a; 1560 for (i = 0; i < mbs; i++) { 1561 rvals[0] = bs * (baij->rstartbs + i); 1562 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1563 for (j = ai[i]; j < ai[i + 1]; j++) { 1564 col = baij->garray[aj[j]] * bs; 1565 for (k = 0; k < bs; k++) { 1566 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1567 col++; 1568 a += bs; 1569 } 1570 } 1571 } 1572 PetscCall(PetscFree(rvals)); 1573 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1574 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1575 1576 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1577 else PetscCall(MatHeaderMerge(A, &B)); 1578 PetscFunctionReturn(PETSC_SUCCESS); 1579 } 1580 1581 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1582 { 1583 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1584 Mat a = baij->A, b = baij->B; 1585 PetscInt s1, s2, s3; 1586 1587 PetscFunctionBegin; 1588 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1589 if (rr) { 1590 PetscCall(VecGetLocalSize(rr, &s1)); 1591 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1592 /* Overlap communication with computation. */ 1593 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1594 } 1595 if (ll) { 1596 PetscCall(VecGetLocalSize(ll, &s1)); 1597 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1598 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1599 } 1600 /* scale the diagonal block */ 1601 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1602 1603 if (rr) { 1604 /* Do a scatter end and then right scale the off-diagonal block */ 1605 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1606 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1607 } 1608 PetscFunctionReturn(PETSC_SUCCESS); 1609 } 1610 1611 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1612 { 1613 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1614 PetscInt *lrows; 1615 PetscInt r, len; 1616 PetscBool cong; 1617 1618 PetscFunctionBegin; 1619 /* get locally owned rows */ 1620 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1621 /* fix right hand side if needed */ 1622 if (x && b) { 1623 const PetscScalar *xx; 1624 PetscScalar *bb; 1625 1626 PetscCall(VecGetArrayRead(x, &xx)); 1627 PetscCall(VecGetArray(b, &bb)); 1628 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1629 PetscCall(VecRestoreArrayRead(x, &xx)); 1630 PetscCall(VecRestoreArray(b, &bb)); 1631 } 1632 1633 /* actually zap the local rows */ 1634 /* 1635 Zero the required rows. If the "diagonal block" of the matrix 1636 is square and the user wishes to set the diagonal we use separate 1637 code so that MatSetValues() is not called for each diagonal allocating 1638 new memory, thus calling lots of mallocs and slowing things down. 1639 1640 */ 1641 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1642 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1643 PetscCall(MatHasCongruentLayouts(A, &cong)); 1644 if ((diag != 0.0) && cong) { 1645 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1646 } else if (diag != 0.0) { 1647 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1648 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1649 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1650 for (r = 0; r < len; ++r) { 1651 const PetscInt row = lrows[r] + A->rmap->rstart; 1652 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1653 } 1654 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1655 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1656 } else { 1657 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1658 } 1659 PetscCall(PetscFree(lrows)); 1660 1661 /* only change matrix nonzero state if pattern was allowed to be changed */ 1662 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1663 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1664 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1665 } 1666 PetscFunctionReturn(PETSC_SUCCESS); 1667 } 1668 1669 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1670 { 1671 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1672 PetscMPIInt n = A->rmap->n, p = 0; 1673 PetscInt i, j, k, r, len = 0, row, col, count; 1674 PetscInt *lrows, *owners = A->rmap->range; 1675 PetscSFNode *rrows; 1676 PetscSF sf; 1677 const PetscScalar *xx; 1678 PetscScalar *bb, *mask; 1679 Vec xmask, lmask; 1680 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1681 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1682 PetscScalar *aa; 1683 1684 PetscFunctionBegin; 1685 /* Create SF where leaves are input rows and roots are owned rows */ 1686 PetscCall(PetscMalloc1(n, &lrows)); 1687 for (r = 0; r < n; ++r) lrows[r] = -1; 1688 PetscCall(PetscMalloc1(N, &rrows)); 1689 for (r = 0; r < N; ++r) { 1690 const PetscInt idx = rows[r]; 1691 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1692 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1693 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1694 } 1695 rrows[r].rank = p; 1696 rrows[r].index = rows[r] - owners[p]; 1697 } 1698 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1699 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1700 /* Collect flags for rows to be zeroed */ 1701 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1702 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1703 PetscCall(PetscSFDestroy(&sf)); 1704 /* Compress and put in row numbers */ 1705 for (r = 0; r < n; ++r) 1706 if (lrows[r] >= 0) lrows[len++] = r; 1707 /* zero diagonal part of matrix */ 1708 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1709 /* handle off diagonal part of matrix */ 1710 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1711 PetscCall(VecDuplicate(l->lvec, &lmask)); 1712 PetscCall(VecGetArray(xmask, &bb)); 1713 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1714 PetscCall(VecRestoreArray(xmask, &bb)); 1715 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1717 PetscCall(VecDestroy(&xmask)); 1718 if (x) { 1719 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1720 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1721 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1722 PetscCall(VecGetArray(b, &bb)); 1723 } 1724 PetscCall(VecGetArray(lmask, &mask)); 1725 /* remove zeroed rows of off diagonal matrix */ 1726 for (i = 0; i < len; ++i) { 1727 row = lrows[i]; 1728 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1729 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1730 for (k = 0; k < count; ++k) { 1731 aa[0] = 0.0; 1732 aa += bs; 1733 } 1734 } 1735 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1736 for (i = 0; i < l->B->rmap->N; ++i) { 1737 row = i / bs; 1738 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1739 for (k = 0; k < bs; ++k) { 1740 col = bs * baij->j[j] + k; 1741 if (PetscAbsScalar(mask[col])) { 1742 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1743 if (x) bb[i] -= aa[0] * xx[col]; 1744 aa[0] = 0.0; 1745 } 1746 } 1747 } 1748 } 1749 if (x) { 1750 PetscCall(VecRestoreArray(b, &bb)); 1751 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1752 } 1753 PetscCall(VecRestoreArray(lmask, &mask)); 1754 PetscCall(VecDestroy(&lmask)); 1755 PetscCall(PetscFree(lrows)); 1756 1757 /* only change matrix nonzero state if pattern was allowed to be changed */ 1758 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1759 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1760 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1761 } 1762 PetscFunctionReturn(PETSC_SUCCESS); 1763 } 1764 1765 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1766 { 1767 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1768 1769 PetscFunctionBegin; 1770 PetscCall(MatSetUnfactored(a->A)); 1771 PetscFunctionReturn(PETSC_SUCCESS); 1772 } 1773 1774 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1775 1776 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1777 { 1778 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1779 Mat a, b, c, d; 1780 PetscBool flg; 1781 1782 PetscFunctionBegin; 1783 a = matA->A; 1784 b = matA->B; 1785 c = matB->A; 1786 d = matB->B; 1787 1788 PetscCall(MatEqual(a, c, &flg)); 1789 if (flg) PetscCall(MatEqual(b, d, &flg)); 1790 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1791 PetscFunctionReturn(PETSC_SUCCESS); 1792 } 1793 1794 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1795 { 1796 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1797 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1798 1799 PetscFunctionBegin; 1800 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1801 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1802 PetscCall(MatCopy_Basic(A, B, str)); 1803 } else { 1804 PetscCall(MatCopy(a->A, b->A, str)); 1805 PetscCall(MatCopy(a->B, b->B, str)); 1806 } 1807 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1808 PetscFunctionReturn(PETSC_SUCCESS); 1809 } 1810 1811 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1812 { 1813 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1814 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1815 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1816 1817 PetscFunctionBegin; 1818 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1819 PetscFunctionReturn(PETSC_SUCCESS); 1820 } 1821 1822 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1823 { 1824 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1825 PetscBLASInt bnz, one = 1; 1826 Mat_SeqBAIJ *x, *y; 1827 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1828 1829 PetscFunctionBegin; 1830 if (str == SAME_NONZERO_PATTERN) { 1831 PetscScalar alpha = a; 1832 x = (Mat_SeqBAIJ *)xx->A->data; 1833 y = (Mat_SeqBAIJ *)yy->A->data; 1834 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1835 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1836 x = (Mat_SeqBAIJ *)xx->B->data; 1837 y = (Mat_SeqBAIJ *)yy->B->data; 1838 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1839 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1840 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1841 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1842 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1843 } else { 1844 Mat B; 1845 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1846 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1847 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1848 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1849 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1850 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1851 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1852 PetscCall(MatSetType(B, MATMPIBAIJ)); 1853 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1854 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1855 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1856 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1857 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1858 PetscCall(MatHeaderMerge(Y, &B)); 1859 PetscCall(PetscFree(nnz_d)); 1860 PetscCall(PetscFree(nnz_o)); 1861 } 1862 PetscFunctionReturn(PETSC_SUCCESS); 1863 } 1864 1865 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1866 { 1867 PetscFunctionBegin; 1868 if (PetscDefined(USE_COMPLEX)) { 1869 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1870 1871 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1872 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1873 } 1874 PetscFunctionReturn(PETSC_SUCCESS); 1875 } 1876 1877 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1878 { 1879 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1880 1881 PetscFunctionBegin; 1882 PetscCall(MatRealPart(a->A)); 1883 PetscCall(MatRealPart(a->B)); 1884 PetscFunctionReturn(PETSC_SUCCESS); 1885 } 1886 1887 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1888 { 1889 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1890 1891 PetscFunctionBegin; 1892 PetscCall(MatImaginaryPart(a->A)); 1893 PetscCall(MatImaginaryPart(a->B)); 1894 PetscFunctionReturn(PETSC_SUCCESS); 1895 } 1896 1897 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1898 { 1899 IS iscol_local; 1900 PetscInt csize; 1901 1902 PetscFunctionBegin; 1903 PetscCall(ISGetLocalSize(iscol, &csize)); 1904 if (call == MAT_REUSE_MATRIX) { 1905 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1906 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1907 } else { 1908 PetscCall(ISAllGather(iscol, &iscol_local)); 1909 } 1910 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1911 if (call == MAT_INITIAL_MATRIX) { 1912 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1913 PetscCall(ISDestroy(&iscol_local)); 1914 } 1915 PetscFunctionReturn(PETSC_SUCCESS); 1916 } 1917 1918 /* 1919 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1920 in local and then by concatenating the local matrices the end result. 1921 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1922 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1923 */ 1924 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1925 { 1926 PetscMPIInt rank, size; 1927 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1928 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1929 Mat M, Mreuse; 1930 MatScalar *vwork, *aa; 1931 MPI_Comm comm; 1932 IS isrow_new, iscol_new; 1933 Mat_SeqBAIJ *aij; 1934 1935 PetscFunctionBegin; 1936 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1937 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1938 PetscCallMPI(MPI_Comm_size(comm, &size)); 1939 /* The compression and expansion should be avoided. Doesn't point 1940 out errors, might change the indices, hence buggey */ 1941 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1942 if (isrow == iscol) { 1943 iscol_new = isrow_new; 1944 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1945 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1946 1947 if (call == MAT_REUSE_MATRIX) { 1948 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1949 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1951 } else { 1952 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1953 } 1954 PetscCall(ISDestroy(&isrow_new)); 1955 PetscCall(ISDestroy(&iscol_new)); 1956 /* 1957 m - number of local rows 1958 n - number of columns (same on all processors) 1959 rstart - first row in new global matrix generated 1960 */ 1961 PetscCall(MatGetBlockSize(mat, &bs)); 1962 PetscCall(MatGetSize(Mreuse, &m, &n)); 1963 m = m / bs; 1964 n = n / bs; 1965 1966 if (call == MAT_INITIAL_MATRIX) { 1967 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1968 ii = aij->i; 1969 jj = aij->j; 1970 1971 /* 1972 Determine the number of non-zeros in the diagonal and off-diagonal 1973 portions of the matrix in order to do correct preallocation 1974 */ 1975 1976 /* first get start and end of "diagonal" columns */ 1977 if (csize == PETSC_DECIDE) { 1978 PetscCall(ISGetSize(isrow, &mglobal)); 1979 if (mglobal == n * bs) { /* square matrix */ 1980 nlocal = m; 1981 } else { 1982 nlocal = n / size + ((n % size) > rank); 1983 } 1984 } else { 1985 nlocal = csize / bs; 1986 } 1987 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1988 rstart = rend - nlocal; 1989 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1990 1991 /* next, compute all the lengths */ 1992 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1993 for (i = 0; i < m; i++) { 1994 jend = ii[i + 1] - ii[i]; 1995 olen = 0; 1996 dlen = 0; 1997 for (j = 0; j < jend; j++) { 1998 if (*jj < rstart || *jj >= rend) olen++; 1999 else dlen++; 2000 jj++; 2001 } 2002 olens[i] = olen; 2003 dlens[i] = dlen; 2004 } 2005 PetscCall(MatCreate(comm, &M)); 2006 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2007 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2008 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2009 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2010 PetscCall(PetscFree2(dlens, olens)); 2011 } else { 2012 PetscInt ml, nl; 2013 2014 M = *newmat; 2015 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2016 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2017 PetscCall(MatZeroEntries(M)); 2018 /* 2019 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2020 rather than the slower MatSetValues(). 2021 */ 2022 M->was_assembled = PETSC_TRUE; 2023 M->assembled = PETSC_FALSE; 2024 } 2025 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2026 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2027 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2028 ii = aij->i; 2029 jj = aij->j; 2030 aa = aij->a; 2031 for (i = 0; i < m; i++) { 2032 row = rstart / bs + i; 2033 nz = ii[i + 1] - ii[i]; 2034 cwork = jj; 2035 jj += nz; 2036 vwork = aa; 2037 aa += nz * bs * bs; 2038 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2039 } 2040 2041 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2042 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2043 *newmat = M; 2044 2045 /* save submatrix used in processor for next request */ 2046 if (call == MAT_INITIAL_MATRIX) { 2047 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2048 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2049 } 2050 PetscFunctionReturn(PETSC_SUCCESS); 2051 } 2052 2053 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2054 { 2055 MPI_Comm comm, pcomm; 2056 PetscInt clocal_size, nrows; 2057 const PetscInt *rows; 2058 PetscMPIInt size; 2059 IS crowp, lcolp; 2060 2061 PetscFunctionBegin; 2062 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2063 /* make a collective version of 'rowp' */ 2064 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2065 if (pcomm == comm) { 2066 crowp = rowp; 2067 } else { 2068 PetscCall(ISGetSize(rowp, &nrows)); 2069 PetscCall(ISGetIndices(rowp, &rows)); 2070 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2071 PetscCall(ISRestoreIndices(rowp, &rows)); 2072 } 2073 PetscCall(ISSetPermutation(crowp)); 2074 /* make a local version of 'colp' */ 2075 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2076 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2077 if (size == 1) { 2078 lcolp = colp; 2079 } else { 2080 PetscCall(ISAllGather(colp, &lcolp)); 2081 } 2082 PetscCall(ISSetPermutation(lcolp)); 2083 /* now we just get the submatrix */ 2084 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2085 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2086 /* clean up */ 2087 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2088 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2089 PetscFunctionReturn(PETSC_SUCCESS); 2090 } 2091 2092 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2093 { 2094 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2095 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2096 2097 PetscFunctionBegin; 2098 if (nghosts) *nghosts = B->nbs; 2099 if (ghosts) *ghosts = baij->garray; 2100 PetscFunctionReturn(PETSC_SUCCESS); 2101 } 2102 2103 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2104 { 2105 Mat B; 2106 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2107 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2108 Mat_SeqAIJ *b; 2109 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2110 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2111 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2112 2113 PetscFunctionBegin; 2114 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2115 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2116 2117 /* Tell every processor the number of nonzeros per row */ 2118 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2119 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2120 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2121 displs = recvcounts + size; 2122 for (i = 0; i < size; i++) { 2123 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2124 displs[i] = A->rmap->range[i] / bs; 2125 } 2126 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2127 /* Create the sequential matrix of the same type as the local block diagonal */ 2128 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2129 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2130 PetscCall(MatSetType(B, MATSEQAIJ)); 2131 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2132 b = (Mat_SeqAIJ *)B->data; 2133 2134 /* Copy my part of matrix column indices over */ 2135 sendcount = ad->nz + bd->nz; 2136 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2137 a_jsendbuf = ad->j; 2138 b_jsendbuf = bd->j; 2139 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2140 cnt = 0; 2141 for (i = 0; i < n; i++) { 2142 /* put in lower diagonal portion */ 2143 m = bd->i[i + 1] - bd->i[i]; 2144 while (m > 0) { 2145 /* is it above diagonal (in bd (compressed) numbering) */ 2146 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2147 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2148 m--; 2149 } 2150 2151 /* put in diagonal portion */ 2152 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2153 2154 /* put in upper diagonal portion */ 2155 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2156 } 2157 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2158 2159 /* Gather all column indices to all processors */ 2160 for (i = 0; i < size; i++) { 2161 recvcounts[i] = 0; 2162 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2163 } 2164 displs[0] = 0; 2165 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2166 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2167 /* Assemble the matrix into useable form (note numerical values not yet set) */ 2168 /* set the b->ilen (length of each row) values */ 2169 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2170 /* set the b->i indices */ 2171 b->i[0] = 0; 2172 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2173 PetscCall(PetscFree(lens)); 2174 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2175 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2176 PetscCall(PetscFree(recvcounts)); 2177 2178 PetscCall(MatPropagateSymmetryOptions(A, B)); 2179 *newmat = B; 2180 PetscFunctionReturn(PETSC_SUCCESS); 2181 } 2182 2183 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2184 { 2185 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2186 Vec bb1 = NULL; 2187 2188 PetscFunctionBegin; 2189 if (flag == SOR_APPLY_UPPER) { 2190 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2191 PetscFunctionReturn(PETSC_SUCCESS); 2192 } 2193 2194 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2195 2196 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2197 if (flag & SOR_ZERO_INITIAL_GUESS) { 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2199 its--; 2200 } 2201 2202 while (its--) { 2203 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2204 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2205 2206 /* update rhs: bb1 = bb - B*x */ 2207 PetscCall(VecScale(mat->lvec, -1.0)); 2208 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2209 2210 /* local sweep */ 2211 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2212 } 2213 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2214 if (flag & SOR_ZERO_INITIAL_GUESS) { 2215 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2216 its--; 2217 } 2218 while (its--) { 2219 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2220 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2221 2222 /* update rhs: bb1 = bb - B*x */ 2223 PetscCall(VecScale(mat->lvec, -1.0)); 2224 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2225 2226 /* local sweep */ 2227 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2228 } 2229 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2230 if (flag & SOR_ZERO_INITIAL_GUESS) { 2231 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2232 its--; 2233 } 2234 while (its--) { 2235 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2236 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2237 2238 /* update rhs: bb1 = bb - B*x */ 2239 PetscCall(VecScale(mat->lvec, -1.0)); 2240 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2241 2242 /* local sweep */ 2243 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2244 } 2245 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2246 2247 PetscCall(VecDestroy(&bb1)); 2248 PetscFunctionReturn(PETSC_SUCCESS); 2249 } 2250 2251 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2252 { 2253 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2254 PetscInt m, N, i, *garray = aij->garray; 2255 PetscInt ib, jb, bs = A->rmap->bs; 2256 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2257 MatScalar *a_val = a_aij->a; 2258 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2259 MatScalar *b_val = b_aij->a; 2260 PetscReal *work; 2261 2262 PetscFunctionBegin; 2263 PetscCall(MatGetSize(A, &m, &N)); 2264 PetscCall(PetscCalloc1(N, &work)); 2265 if (type == NORM_2) { 2266 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2267 for (jb = 0; jb < bs; jb++) { 2268 for (ib = 0; ib < bs; ib++) { 2269 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2270 a_val++; 2271 } 2272 } 2273 } 2274 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2278 b_val++; 2279 } 2280 } 2281 } 2282 } else if (type == NORM_1) { 2283 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2284 for (jb = 0; jb < bs; jb++) { 2285 for (ib = 0; ib < bs; ib++) { 2286 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2287 a_val++; 2288 } 2289 } 2290 } 2291 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2295 b_val++; 2296 } 2297 } 2298 } 2299 } else if (type == NORM_INFINITY) { 2300 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2301 for (jb = 0; jb < bs; jb++) { 2302 for (ib = 0; ib < bs; ib++) { 2303 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2304 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2305 a_val++; 2306 } 2307 } 2308 } 2309 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2310 for (jb = 0; jb < bs; jb++) { 2311 for (ib = 0; ib < bs; ib++) { 2312 int col = garray[b_aij->j[i]] * bs + jb; 2313 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2314 b_val++; 2315 } 2316 } 2317 } 2318 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2319 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2320 for (jb = 0; jb < bs; jb++) { 2321 for (ib = 0; ib < bs; ib++) { 2322 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2323 a_val++; 2324 } 2325 } 2326 } 2327 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2331 b_val++; 2332 } 2333 } 2334 } 2335 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2336 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2337 for (jb = 0; jb < bs; jb++) { 2338 for (ib = 0; ib < bs; ib++) { 2339 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2340 a_val++; 2341 } 2342 } 2343 } 2344 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2345 for (jb = 0; jb < bs; jb++) { 2346 for (ib = 0; ib < bs; ib++) { 2347 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2348 b_val++; 2349 } 2350 } 2351 } 2352 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2353 if (type == NORM_INFINITY) { 2354 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2355 } else { 2356 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2357 } 2358 PetscCall(PetscFree(work)); 2359 if (type == NORM_2) { 2360 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2361 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2362 for (i = 0; i < N; i++) reductions[i] /= m; 2363 } 2364 PetscFunctionReturn(PETSC_SUCCESS); 2365 } 2366 2367 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2368 { 2369 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2370 2371 PetscFunctionBegin; 2372 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2373 A->factorerrortype = a->A->factorerrortype; 2374 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2375 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2376 PetscFunctionReturn(PETSC_SUCCESS); 2377 } 2378 2379 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2380 { 2381 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2382 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2383 2384 PetscFunctionBegin; 2385 if (!Y->preallocated) { 2386 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2387 } else if (!aij->nz) { 2388 PetscInt nonew = aij->nonew; 2389 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2390 aij->nonew = nonew; 2391 } 2392 PetscCall(MatShift_Basic(Y, a)); 2393 PetscFunctionReturn(PETSC_SUCCESS); 2394 } 2395 2396 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2397 { 2398 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2399 2400 PetscFunctionBegin; 2401 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2402 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2403 if (d) { 2404 PetscInt rstart; 2405 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2406 *d += rstart / A->rmap->bs; 2407 } 2408 PetscFunctionReturn(PETSC_SUCCESS); 2409 } 2410 2411 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2412 { 2413 PetscFunctionBegin; 2414 *a = ((Mat_MPIBAIJ *)A->data)->A; 2415 PetscFunctionReturn(PETSC_SUCCESS); 2416 } 2417 2418 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2419 { 2420 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2421 2422 PetscFunctionBegin; 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2424 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2425 PetscFunctionReturn(PETSC_SUCCESS); 2426 } 2427 2428 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2429 MatGetRow_MPIBAIJ, 2430 MatRestoreRow_MPIBAIJ, 2431 MatMult_MPIBAIJ, 2432 /* 4*/ MatMultAdd_MPIBAIJ, 2433 MatMultTranspose_MPIBAIJ, 2434 MatMultTransposeAdd_MPIBAIJ, 2435 NULL, 2436 NULL, 2437 NULL, 2438 /*10*/ NULL, 2439 NULL, 2440 NULL, 2441 MatSOR_MPIBAIJ, 2442 MatTranspose_MPIBAIJ, 2443 /*15*/ MatGetInfo_MPIBAIJ, 2444 MatEqual_MPIBAIJ, 2445 MatGetDiagonal_MPIBAIJ, 2446 MatDiagonalScale_MPIBAIJ, 2447 MatNorm_MPIBAIJ, 2448 /*20*/ MatAssemblyBegin_MPIBAIJ, 2449 MatAssemblyEnd_MPIBAIJ, 2450 MatSetOption_MPIBAIJ, 2451 MatZeroEntries_MPIBAIJ, 2452 /*24*/ MatZeroRows_MPIBAIJ, 2453 NULL, 2454 NULL, 2455 NULL, 2456 NULL, 2457 /*29*/ MatSetUp_MPI_Hash, 2458 NULL, 2459 NULL, 2460 MatGetDiagonalBlock_MPIBAIJ, 2461 NULL, 2462 /*34*/ MatDuplicate_MPIBAIJ, 2463 NULL, 2464 NULL, 2465 NULL, 2466 NULL, 2467 /*39*/ MatAXPY_MPIBAIJ, 2468 MatCreateSubMatrices_MPIBAIJ, 2469 MatIncreaseOverlap_MPIBAIJ, 2470 MatGetValues_MPIBAIJ, 2471 MatCopy_MPIBAIJ, 2472 /*44*/ NULL, 2473 MatScale_MPIBAIJ, 2474 MatShift_MPIBAIJ, 2475 NULL, 2476 MatZeroRowsColumns_MPIBAIJ, 2477 /*49*/ NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 NULL, 2482 /*54*/ MatFDColoringCreate_MPIXAIJ, 2483 NULL, 2484 MatSetUnfactored_MPIBAIJ, 2485 MatPermute_MPIBAIJ, 2486 MatSetValuesBlocked_MPIBAIJ, 2487 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2488 MatDestroy_MPIBAIJ, 2489 MatView_MPIBAIJ, 2490 NULL, 2491 NULL, 2492 /*64*/ NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 NULL, 2497 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 NULL, 2501 NULL, 2502 /*74*/ NULL, 2503 MatFDColoringApply_BAIJ, 2504 NULL, 2505 NULL, 2506 NULL, 2507 /*79*/ NULL, 2508 NULL, 2509 NULL, 2510 NULL, 2511 MatLoad_MPIBAIJ, 2512 /*84*/ NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 NULL, 2517 /*89*/ NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 NULL, 2522 /*94*/ NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 NULL, 2527 /*99*/ NULL, 2528 NULL, 2529 NULL, 2530 MatConjugate_MPIBAIJ, 2531 NULL, 2532 /*104*/ NULL, 2533 MatRealPart_MPIBAIJ, 2534 MatImaginaryPart_MPIBAIJ, 2535 NULL, 2536 NULL, 2537 /*109*/ NULL, 2538 NULL, 2539 NULL, 2540 NULL, 2541 MatMissingDiagonal_MPIBAIJ, 2542 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2543 NULL, 2544 MatGetGhosts_MPIBAIJ, 2545 NULL, 2546 NULL, 2547 /*119*/ NULL, 2548 NULL, 2549 NULL, 2550 NULL, 2551 MatGetMultiProcBlock_MPIBAIJ, 2552 /*124*/ NULL, 2553 MatGetColumnReductions_MPIBAIJ, 2554 MatInvertBlockDiagonal_MPIBAIJ, 2555 NULL, 2556 NULL, 2557 /*129*/ NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 NULL, 2562 /*134*/ NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 NULL, 2567 /*139*/ MatSetBlockSizes_Default, 2568 NULL, 2569 NULL, 2570 MatFDColoringSetUp_MPIXAIJ, 2571 NULL, 2572 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 NULL, 2578 /*150*/ NULL, 2579 MatEliminateZeros_MPIBAIJ}; 2580 2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2583 2584 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2585 { 2586 PetscInt m, rstart, cstart, cend; 2587 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2588 const PetscInt *JJ = NULL; 2589 PetscScalar *values = NULL; 2590 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2591 PetscBool nooffprocentries; 2592 2593 PetscFunctionBegin; 2594 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2595 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2596 PetscCall(PetscLayoutSetUp(B->rmap)); 2597 PetscCall(PetscLayoutSetUp(B->cmap)); 2598 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2599 m = B->rmap->n / bs; 2600 rstart = B->rmap->rstart / bs; 2601 cstart = B->cmap->rstart / bs; 2602 cend = B->cmap->rend / bs; 2603 2604 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2605 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2606 for (i = 0; i < m; i++) { 2607 nz = ii[i + 1] - ii[i]; 2608 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2609 nz_max = PetscMax(nz_max, nz); 2610 dlen = 0; 2611 olen = 0; 2612 JJ = jj + ii[i]; 2613 for (j = 0; j < nz; j++) { 2614 if (*JJ < cstart || *JJ >= cend) olen++; 2615 else dlen++; 2616 JJ++; 2617 } 2618 d_nnz[i] = dlen; 2619 o_nnz[i] = olen; 2620 } 2621 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2622 PetscCall(PetscFree2(d_nnz, o_nnz)); 2623 2624 values = (PetscScalar *)V; 2625 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2626 for (i = 0; i < m; i++) { 2627 PetscInt row = i + rstart; 2628 PetscInt ncols = ii[i + 1] - ii[i]; 2629 const PetscInt *icols = jj + ii[i]; 2630 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2631 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2633 } else { /* block ordering does not match so we can only insert one block at a time. */ 2634 PetscInt j; 2635 for (j = 0; j < ncols; j++) { 2636 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2638 } 2639 } 2640 } 2641 2642 if (!V) PetscCall(PetscFree(values)); 2643 nooffprocentries = B->nooffprocentries; 2644 B->nooffprocentries = PETSC_TRUE; 2645 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2646 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2647 B->nooffprocentries = nooffprocentries; 2648 2649 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 /*@C 2654 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2655 2656 Collective 2657 2658 Input Parameters: 2659 + B - the matrix 2660 . bs - the block size 2661 . i - the indices into `j` for the start of each local row (starts with zero) 2662 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2663 - v - optional values in the matrix 2664 2665 Level: advanced 2666 2667 Notes: 2668 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2669 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2670 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2671 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2672 block column and the second index is over columns within a block. 2673 2674 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2675 2676 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2677 @*/ 2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2682 PetscValidType(B, 1); 2683 PetscValidLogicalCollectiveInt(B, bs, 2); 2684 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2689 { 2690 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2691 PetscInt i; 2692 PetscMPIInt size; 2693 2694 PetscFunctionBegin; 2695 if (B->hash_active) { 2696 B->ops[0] = b->cops; 2697 B->hash_active = PETSC_FALSE; 2698 } 2699 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2700 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2701 PetscCall(PetscLayoutSetUp(B->rmap)); 2702 PetscCall(PetscLayoutSetUp(B->cmap)); 2703 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2704 2705 if (d_nnz) { 2706 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2707 } 2708 if (o_nnz) { 2709 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2710 } 2711 2712 b->bs2 = bs * bs; 2713 b->mbs = B->rmap->n / bs; 2714 b->nbs = B->cmap->n / bs; 2715 b->Mbs = B->rmap->N / bs; 2716 b->Nbs = B->cmap->N / bs; 2717 2718 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2719 b->rstartbs = B->rmap->rstart / bs; 2720 b->rendbs = B->rmap->rend / bs; 2721 b->cstartbs = B->cmap->rstart / bs; 2722 b->cendbs = B->cmap->rend / bs; 2723 2724 #if defined(PETSC_USE_CTABLE) 2725 PetscCall(PetscHMapIDestroy(&b->colmap)); 2726 #else 2727 PetscCall(PetscFree(b->colmap)); 2728 #endif 2729 PetscCall(PetscFree(b->garray)); 2730 PetscCall(VecDestroy(&b->lvec)); 2731 PetscCall(VecScatterDestroy(&b->Mvctx)); 2732 2733 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2734 PetscCall(MatDestroy(&b->B)); 2735 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2736 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2737 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2738 2739 PetscCall(MatDestroy(&b->A)); 2740 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2741 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2742 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2743 2744 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2745 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2746 B->preallocated = PETSC_TRUE; 2747 B->was_assembled = PETSC_FALSE; 2748 B->assembled = PETSC_FALSE; 2749 PetscFunctionReturn(PETSC_SUCCESS); 2750 } 2751 2752 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2753 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2754 2755 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2756 { 2757 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2758 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2759 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2760 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2761 2762 PetscFunctionBegin; 2763 PetscCall(PetscMalloc1(M + 1, &ii)); 2764 ii[0] = 0; 2765 for (i = 0; i < M; i++) { 2766 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2767 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2768 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2769 /* remove one from count of matrix has diagonal */ 2770 for (j = id[i]; j < id[i + 1]; j++) { 2771 if (jd[j] == i) { 2772 ii[i + 1]--; 2773 break; 2774 } 2775 } 2776 } 2777 PetscCall(PetscMalloc1(ii[M], &jj)); 2778 cnt = 0; 2779 for (i = 0; i < M; i++) { 2780 for (j = io[i]; j < io[i + 1]; j++) { 2781 if (garray[jo[j]] > rstart) break; 2782 jj[cnt++] = garray[jo[j]]; 2783 } 2784 for (k = id[i]; k < id[i + 1]; k++) { 2785 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2786 } 2787 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2788 } 2789 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2794 2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2798 { 2799 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2800 Mat_MPIAIJ *b; 2801 Mat B; 2802 2803 PetscFunctionBegin; 2804 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2805 2806 if (reuse == MAT_REUSE_MATRIX) { 2807 B = *newmat; 2808 } else { 2809 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2810 PetscCall(MatSetType(B, MATMPIAIJ)); 2811 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2812 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2813 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2814 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2815 } 2816 b = (Mat_MPIAIJ *)B->data; 2817 2818 if (reuse == MAT_REUSE_MATRIX) { 2819 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2820 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2821 } else { 2822 PetscInt *garray = a->garray; 2823 Mat_SeqAIJ *bB; 2824 PetscInt bs, nnz; 2825 PetscCall(MatDestroy(&b->A)); 2826 PetscCall(MatDestroy(&b->B)); 2827 /* just clear out the data structure */ 2828 PetscCall(MatDisAssemble_MPIAIJ(B)); 2829 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2830 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2831 2832 /* Global numbering for b->B columns */ 2833 bB = (Mat_SeqAIJ *)b->B->data; 2834 bs = A->rmap->bs; 2835 nnz = bB->i[A->rmap->n]; 2836 for (PetscInt k = 0; k < nnz; k++) { 2837 PetscInt bj = bB->j[k] / bs; 2838 PetscInt br = bB->j[k] % bs; 2839 bB->j[k] = garray[bj] * bs + br; 2840 } 2841 } 2842 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2843 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2844 2845 if (reuse == MAT_INPLACE_MATRIX) { 2846 PetscCall(MatHeaderReplace(A, &B)); 2847 } else { 2848 *newmat = B; 2849 } 2850 PetscFunctionReturn(PETSC_SUCCESS); 2851 } 2852 2853 /*MC 2854 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2855 2856 Options Database Keys: 2857 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2858 . -mat_block_size <bs> - set the blocksize used to store the matrix 2859 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2860 - -mat_use_hash_table <fact> - set hash table factor 2861 2862 Level: beginner 2863 2864 Note: 2865 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2866 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2867 2868 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2869 M*/ 2870 2871 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2872 2873 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2874 { 2875 Mat_MPIBAIJ *b; 2876 PetscBool flg = PETSC_FALSE; 2877 2878 PetscFunctionBegin; 2879 PetscCall(PetscNew(&b)); 2880 B->data = (void *)b; 2881 B->ops[0] = MatOps_Values; 2882 B->assembled = PETSC_FALSE; 2883 2884 B->insertmode = NOT_SET_VALUES; 2885 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2886 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2887 2888 /* build local table of row and column ownerships */ 2889 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2890 2891 /* build cache for off array entries formed */ 2892 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2893 2894 b->donotstash = PETSC_FALSE; 2895 b->colmap = NULL; 2896 b->garray = NULL; 2897 b->roworiented = PETSC_TRUE; 2898 2899 /* stuff used in block assembly */ 2900 b->barray = NULL; 2901 2902 /* stuff used for matrix vector multiply */ 2903 b->lvec = NULL; 2904 b->Mvctx = NULL; 2905 2906 /* stuff for MatGetRow() */ 2907 b->rowindices = NULL; 2908 b->rowvalues = NULL; 2909 b->getrowactive = PETSC_FALSE; 2910 2911 /* hash table stuff */ 2912 b->ht = NULL; 2913 b->hd = NULL; 2914 b->ht_size = 0; 2915 b->ht_flag = PETSC_FALSE; 2916 b->ht_fact = 0; 2917 b->ht_total_ct = 0; 2918 b->ht_insert_ct = 0; 2919 2920 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2921 b->ijonly = PETSC_FALSE; 2922 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2926 #if defined(PETSC_HAVE_HYPRE) 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2928 #endif 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2936 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2937 2938 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2939 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2940 if (flg) { 2941 PetscReal fact = 1.39; 2942 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2943 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2944 if (fact <= 1.0) fact = 1.39; 2945 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2946 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2947 } 2948 PetscOptionsEnd(); 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2953 /*MC 2954 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2955 2956 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2957 and `MATMPIBAIJ` otherwise. 2958 2959 Options Database Keys: 2960 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2961 2962 Level: beginner 2963 2964 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2965 M*/ 2966 2967 /*@C 2968 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2969 (block compressed row). 2970 2971 Collective 2972 2973 Input Parameters: 2974 + B - the matrix 2975 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2976 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2977 . d_nz - number of block nonzeros per block row in diagonal portion of local 2978 submatrix (same for all local rows) 2979 . d_nnz - array containing the number of block nonzeros in the various block rows 2980 of the in diagonal portion of the local (possibly different for each block 2981 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2982 set it even if it is zero. 2983 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2984 submatrix (same for all local rows). 2985 - o_nnz - array containing the number of nonzeros in the various block rows of the 2986 off-diagonal portion of the local submatrix (possibly different for 2987 each block row) or `NULL`. 2988 2989 If the *_nnz parameter is given then the *_nz parameter is ignored 2990 2991 Options Database Keys: 2992 + -mat_block_size - size of the blocks to use 2993 - -mat_use_hash_table <fact> - set hash table factor 2994 2995 Level: intermediate 2996 2997 Notes: 2998 For good matrix assembly performance 2999 the user should preallocate the matrix storage by setting the parameters 3000 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3001 performance can be increased by more than a factor of 50. 3002 3003 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3004 than it must be used on all processors that share the object for that argument. 3005 3006 Storage Information: 3007 For a square global matrix we define each processor's diagonal portion 3008 to be its local rows and the corresponding columns (a square submatrix); 3009 each processor's off-diagonal portion encompasses the remainder of the 3010 local matrix (a rectangular submatrix). 3011 3012 The user can specify preallocated storage for the diagonal part of 3013 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3014 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3015 memory allocation. Likewise, specify preallocated storage for the 3016 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3017 3018 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3019 the figure below we depict these three local rows and all columns (0-11). 3020 3021 .vb 3022 0 1 2 3 4 5 6 7 8 9 10 11 3023 -------------------------- 3024 row 3 |o o o d d d o o o o o o 3025 row 4 |o o o d d d o o o o o o 3026 row 5 |o o o d d d o o o o o o 3027 -------------------------- 3028 .ve 3029 3030 Thus, any entries in the d locations are stored in the d (diagonal) 3031 submatrix, and any entries in the o locations are stored in the 3032 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3033 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3034 3035 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3036 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3037 In general, for PDE problems in which most nonzeros are near the diagonal, 3038 one expects `d_nz` >> `o_nz`. 3039 3040 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3041 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3042 You can also run with the option `-info` and look for messages with the string 3043 malloc in them to see if additional memory allocation was needed. 3044 3045 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3046 @*/ 3047 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3048 { 3049 PetscFunctionBegin; 3050 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3051 PetscValidType(B, 1); 3052 PetscValidLogicalCollectiveInt(B, bs, 2); 3053 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3058 /*@C 3059 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3060 (block compressed row). 3061 3062 Collective 3063 3064 Input Parameters: 3065 + comm - MPI communicator 3066 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3067 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3068 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3069 This value should be the same as the local size used in creating the 3070 y vector for the matrix-vector product y = Ax. 3071 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3072 This value should be the same as the local size used in creating the 3073 x vector for the matrix-vector product y = Ax. 3074 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3075 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3076 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3077 submatrix (same for all local rows) 3078 . d_nnz - array containing the number of nonzero blocks in the various block rows 3079 of the in diagonal portion of the local (possibly different for each block 3080 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3081 and set it even if it is zero. 3082 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3083 submatrix (same for all local rows). 3084 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3085 off-diagonal portion of the local submatrix (possibly different for 3086 each block row) or NULL. 3087 3088 Output Parameter: 3089 . A - the matrix 3090 3091 Options Database Keys: 3092 + -mat_block_size - size of the blocks to use 3093 - -mat_use_hash_table <fact> - set hash table factor 3094 3095 Level: intermediate 3096 3097 Notes: 3098 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3099 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3100 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3101 3102 For good matrix assembly performance 3103 the user should preallocate the matrix storage by setting the parameters 3104 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3105 performance can be increased by more than a factor of 50. 3106 3107 If the *_nnz parameter is given then the *_nz parameter is ignored 3108 3109 A nonzero block is any block that as 1 or more nonzeros in it 3110 3111 The user MUST specify either the local or global matrix dimensions 3112 (possibly both). 3113 3114 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3115 than it must be used on all processors that share the object for that argument. 3116 3117 Storage Information: 3118 For a square global matrix we define each processor's diagonal portion 3119 to be its local rows and the corresponding columns (a square submatrix); 3120 each processor's off-diagonal portion encompasses the remainder of the 3121 local matrix (a rectangular submatrix). 3122 3123 The user can specify preallocated storage for the diagonal part of 3124 the local submatrix with either d_nz or d_nnz (not both). Set 3125 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3126 memory allocation. Likewise, specify preallocated storage for the 3127 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3128 3129 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3130 the figure below we depict these three local rows and all columns (0-11). 3131 3132 .vb 3133 0 1 2 3 4 5 6 7 8 9 10 11 3134 -------------------------- 3135 row 3 |o o o d d d o o o o o o 3136 row 4 |o o o d d d o o o o o o 3137 row 5 |o o o d d d o o o o o o 3138 -------------------------- 3139 .ve 3140 3141 Thus, any entries in the d locations are stored in the d (diagonal) 3142 submatrix, and any entries in the o locations are stored in the 3143 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3144 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3145 3146 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3147 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3148 In general, for PDE problems in which most nonzeros are near the diagonal, 3149 one expects `d_nz` >> `o_nz`. 3150 3151 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3152 @*/ 3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3154 { 3155 PetscMPIInt size; 3156 3157 PetscFunctionBegin; 3158 PetscCall(MatCreate(comm, A)); 3159 PetscCall(MatSetSizes(*A, m, n, M, N)); 3160 PetscCallMPI(MPI_Comm_size(comm, &size)); 3161 if (size > 1) { 3162 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3163 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3164 } else { 3165 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3166 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3167 } 3168 PetscFunctionReturn(PETSC_SUCCESS); 3169 } 3170 3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3172 { 3173 Mat mat; 3174 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3175 PetscInt len = 0; 3176 3177 PetscFunctionBegin; 3178 *newmat = NULL; 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3180 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3181 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3182 3183 mat->factortype = matin->factortype; 3184 mat->preallocated = PETSC_TRUE; 3185 mat->assembled = PETSC_TRUE; 3186 mat->insertmode = NOT_SET_VALUES; 3187 3188 a = (Mat_MPIBAIJ *)mat->data; 3189 mat->rmap->bs = matin->rmap->bs; 3190 a->bs2 = oldmat->bs2; 3191 a->mbs = oldmat->mbs; 3192 a->nbs = oldmat->nbs; 3193 a->Mbs = oldmat->Mbs; 3194 a->Nbs = oldmat->Nbs; 3195 3196 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3197 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3198 3199 a->size = oldmat->size; 3200 a->rank = oldmat->rank; 3201 a->donotstash = oldmat->donotstash; 3202 a->roworiented = oldmat->roworiented; 3203 a->rowindices = NULL; 3204 a->rowvalues = NULL; 3205 a->getrowactive = PETSC_FALSE; 3206 a->barray = NULL; 3207 a->rstartbs = oldmat->rstartbs; 3208 a->rendbs = oldmat->rendbs; 3209 a->cstartbs = oldmat->cstartbs; 3210 a->cendbs = oldmat->cendbs; 3211 3212 /* hash table stuff */ 3213 a->ht = NULL; 3214 a->hd = NULL; 3215 a->ht_size = 0; 3216 a->ht_flag = oldmat->ht_flag; 3217 a->ht_fact = oldmat->ht_fact; 3218 a->ht_total_ct = 0; 3219 a->ht_insert_ct = 0; 3220 3221 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3222 if (oldmat->colmap) { 3223 #if defined(PETSC_USE_CTABLE) 3224 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3225 #else 3226 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3227 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3228 #endif 3229 } else a->colmap = NULL; 3230 3231 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3232 PetscCall(PetscMalloc1(len, &a->garray)); 3233 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3234 } else a->garray = NULL; 3235 3236 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3237 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3238 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3239 3240 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3241 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3242 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3243 *newmat = mat; 3244 PetscFunctionReturn(PETSC_SUCCESS); 3245 } 3246 3247 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3248 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3249 { 3250 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3251 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3252 PetscScalar *matvals; 3253 3254 PetscFunctionBegin; 3255 PetscCall(PetscViewerSetUp(viewer)); 3256 3257 /* read in matrix header */ 3258 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3259 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3260 M = header[1]; 3261 N = header[2]; 3262 nz = header[3]; 3263 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3264 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3265 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3266 3267 /* set block sizes from the viewer's .info file */ 3268 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3269 /* set local sizes if not set already */ 3270 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3271 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3272 /* set global sizes if not set already */ 3273 if (mat->rmap->N < 0) mat->rmap->N = M; 3274 if (mat->cmap->N < 0) mat->cmap->N = N; 3275 PetscCall(PetscLayoutSetUp(mat->rmap)); 3276 PetscCall(PetscLayoutSetUp(mat->cmap)); 3277 3278 /* check if the matrix sizes are correct */ 3279 PetscCall(MatGetSize(mat, &rows, &cols)); 3280 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3281 PetscCall(MatGetBlockSize(mat, &bs)); 3282 PetscCall(MatGetLocalSize(mat, &m, &n)); 3283 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3284 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3285 mbs = m / bs; 3286 nbs = n / bs; 3287 3288 /* read in row lengths and build row indices */ 3289 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3290 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3291 rowidxs[0] = 0; 3292 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3293 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3294 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3295 3296 /* read in column indices and matrix values */ 3297 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3298 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3299 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3300 3301 { /* preallocate matrix storage */ 3302 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3303 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3304 PetscBool sbaij, done; 3305 PetscInt *d_nnz, *o_nnz; 3306 3307 PetscCall(PetscBTCreate(nbs, &bt)); 3308 PetscCall(PetscHSetICreate(&ht)); 3309 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3310 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3311 for (i = 0; i < mbs; i++) { 3312 PetscCall(PetscBTMemzero(nbs, bt)); 3313 PetscCall(PetscHSetIClear(ht)); 3314 for (k = 0; k < bs; k++) { 3315 PetscInt row = bs * i + k; 3316 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3317 PetscInt col = colidxs[j]; 3318 if (!sbaij || col >= row) { 3319 if (col >= cs && col < ce) { 3320 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3321 } else { 3322 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3323 if (done) o_nnz[i]++; 3324 } 3325 } 3326 } 3327 } 3328 } 3329 PetscCall(PetscBTDestroy(&bt)); 3330 PetscCall(PetscHSetIDestroy(&ht)); 3331 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3332 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3333 PetscCall(PetscFree2(d_nnz, o_nnz)); 3334 } 3335 3336 /* store matrix values */ 3337 for (i = 0; i < m; i++) { 3338 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3339 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3340 } 3341 3342 PetscCall(PetscFree(rowidxs)); 3343 PetscCall(PetscFree2(colidxs, matvals)); 3344 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3345 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3346 PetscFunctionReturn(PETSC_SUCCESS); 3347 } 3348 3349 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3350 { 3351 PetscBool isbinary; 3352 3353 PetscFunctionBegin; 3354 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3355 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3356 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3357 PetscFunctionReturn(PETSC_SUCCESS); 3358 } 3359 3360 /*@ 3361 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3362 3363 Input Parameters: 3364 + mat - the matrix 3365 - fact - factor 3366 3367 Options Database Key: 3368 . -mat_use_hash_table <fact> - provide the factor 3369 3370 Level: advanced 3371 3372 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3373 @*/ 3374 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3375 { 3376 PetscFunctionBegin; 3377 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3378 PetscFunctionReturn(PETSC_SUCCESS); 3379 } 3380 3381 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3382 { 3383 Mat_MPIBAIJ *baij; 3384 3385 PetscFunctionBegin; 3386 baij = (Mat_MPIBAIJ *)mat->data; 3387 baij->ht_fact = fact; 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } 3390 3391 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3392 { 3393 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3394 PetscBool flg; 3395 3396 PetscFunctionBegin; 3397 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3398 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3399 if (Ad) *Ad = a->A; 3400 if (Ao) *Ao = a->B; 3401 if (colmap) *colmap = a->garray; 3402 PetscFunctionReturn(PETSC_SUCCESS); 3403 } 3404 3405 /* 3406 Special version for direct calls from Fortran (to eliminate two function call overheads 3407 */ 3408 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3409 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3410 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3411 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3412 #endif 3413 3414 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3415 /*@C 3416 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3417 3418 Collective 3419 3420 Input Parameters: 3421 + matin - the matrix 3422 . min - number of input rows 3423 . im - input rows 3424 . nin - number of input columns 3425 . in - input columns 3426 . v - numerical values input 3427 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3428 3429 Level: advanced 3430 3431 Developer Notes: 3432 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3433 3434 .seealso: `Mat`, `MatSetValuesBlocked()` 3435 @*/ 3436 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3437 { 3438 /* convert input arguments to C version */ 3439 Mat mat = *matin; 3440 PetscInt m = *min, n = *nin; 3441 InsertMode addv = *addvin; 3442 3443 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3444 const MatScalar *value; 3445 MatScalar *barray = baij->barray; 3446 PetscBool roworiented = baij->roworiented; 3447 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3448 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3449 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3450 3451 PetscFunctionBegin; 3452 /* tasks normally handled by MatSetValuesBlocked() */ 3453 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3454 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3455 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3456 if (mat->assembled) { 3457 mat->was_assembled = PETSC_TRUE; 3458 mat->assembled = PETSC_FALSE; 3459 } 3460 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3461 3462 if (!barray) { 3463 PetscCall(PetscMalloc1(bs2, &barray)); 3464 baij->barray = barray; 3465 } 3466 3467 if (roworiented) stepval = (n - 1) * bs; 3468 else stepval = (m - 1) * bs; 3469 3470 for (i = 0; i < m; i++) { 3471 if (im[i] < 0) continue; 3472 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3473 if (im[i] >= rstart && im[i] < rend) { 3474 row = im[i] - rstart; 3475 for (j = 0; j < n; j++) { 3476 /* If NumCol = 1 then a copy is not required */ 3477 if ((roworiented) && (n == 1)) { 3478 barray = (MatScalar *)v + i * bs2; 3479 } else if ((!roworiented) && (m == 1)) { 3480 barray = (MatScalar *)v + j * bs2; 3481 } else { /* Here a copy is required */ 3482 if (roworiented) { 3483 value = v + i * (stepval + bs) * bs + j * bs; 3484 } else { 3485 value = v + j * (stepval + bs) * bs + i * bs; 3486 } 3487 for (ii = 0; ii < bs; ii++, value += stepval) { 3488 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3489 } 3490 barray -= bs2; 3491 } 3492 3493 if (in[j] >= cstart && in[j] < cend) { 3494 col = in[j] - cstart; 3495 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3496 } else if (in[j] < 0) { 3497 continue; 3498 } else { 3499 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3500 if (mat->was_assembled) { 3501 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3502 3503 #if defined(PETSC_USE_DEBUG) 3504 #if defined(PETSC_USE_CTABLE) 3505 { 3506 PetscInt data; 3507 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3508 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3509 } 3510 #else 3511 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3512 #endif 3513 #endif 3514 #if defined(PETSC_USE_CTABLE) 3515 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3516 col = (col - 1) / bs; 3517 #else 3518 col = (baij->colmap[in[j]] - 1) / bs; 3519 #endif 3520 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3521 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3522 col = in[j]; 3523 } 3524 } else col = in[j]; 3525 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3526 } 3527 } 3528 } else { 3529 if (!baij->donotstash) { 3530 if (roworiented) { 3531 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3532 } else { 3533 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3534 } 3535 } 3536 } 3537 } 3538 3539 /* task normally handled by MatSetValuesBlocked() */ 3540 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3541 PetscFunctionReturn(PETSC_SUCCESS); 3542 } 3543 3544 /*@ 3545 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3546 CSR format the local rows. 3547 3548 Collective 3549 3550 Input Parameters: 3551 + comm - MPI communicator 3552 . bs - the block size, only a block size of 1 is supported 3553 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3554 . n - This value should be the same as the local size used in creating the 3555 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3556 calculated if N is given) For square matrices n is almost always m. 3557 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3558 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3559 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3560 . j - column indices 3561 - a - matrix values 3562 3563 Output Parameter: 3564 . mat - the matrix 3565 3566 Level: intermediate 3567 3568 Notes: 3569 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3570 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3571 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3572 3573 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3574 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3575 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3576 with column-major ordering within blocks. 3577 3578 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3579 3580 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3581 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3582 @*/ 3583 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3584 { 3585 PetscFunctionBegin; 3586 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3587 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3588 PetscCall(MatCreate(comm, mat)); 3589 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3590 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3591 PetscCall(MatSetBlockSize(*mat, bs)); 3592 PetscCall(MatSetUp(*mat)); 3593 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3594 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3595 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3596 PetscFunctionReturn(PETSC_SUCCESS); 3597 } 3598 3599 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3600 { 3601 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3602 PetscInt *indx; 3603 PetscScalar *values; 3604 3605 PetscFunctionBegin; 3606 PetscCall(MatGetSize(inmat, &m, &N)); 3607 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3608 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3609 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3610 PetscInt *bindx, rmax = a->rmax, j; 3611 PetscMPIInt rank, size; 3612 3613 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3614 mbs = m / bs; 3615 Nbs = N / cbs; 3616 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3617 nbs = n / cbs; 3618 3619 PetscCall(PetscMalloc1(rmax, &bindx)); 3620 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3621 3622 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3623 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3624 if (rank == size - 1) { 3625 /* Check sum(nbs) = Nbs */ 3626 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3627 } 3628 3629 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3630 for (i = 0; i < mbs; i++) { 3631 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3632 nnz = nnz / bs; 3633 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3634 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3635 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3636 } 3637 PetscCall(PetscFree(bindx)); 3638 3639 PetscCall(MatCreate(comm, outmat)); 3640 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3641 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3642 PetscCall(MatSetType(*outmat, MATBAIJ)); 3643 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3644 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3645 MatPreallocateEnd(dnz, onz); 3646 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3647 } 3648 3649 /* numeric phase */ 3650 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3651 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3652 3653 for (i = 0; i < m; i++) { 3654 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3655 Ii = i + rstart; 3656 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3657 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3658 } 3659 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3660 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3661 PetscFunctionReturn(PETSC_SUCCESS); 3662 } 3663