1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the roworiented flag to column oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 940 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 941 while (1) { 942 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 943 if (!flg) break; 944 945 for (i = 0; i < n;) { 946 /* Now identify the consecutive vals belonging to the same row */ 947 for (j = i, rstart = row[j]; j < n; j++) { 948 if (row[j] != rstart) break; 949 } 950 if (j < n) ncols = j - i; 951 else ncols = n - i; 952 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 953 i = j; 954 } 955 } 956 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 957 958 baij->roworiented = r1; 959 a->roworiented = r2; 960 961 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 962 } 963 964 PetscCall(MatAssemblyBegin(baij->A, mode)); 965 PetscCall(MatAssemblyEnd(baij->A, mode)); 966 967 /* determine if any processor has disassembled, if so we must 968 also disassemble ourselves, in order that we may reassemble. */ 969 /* 970 if nonzero structure of submatrix B cannot change then we know that 971 no processor disassembled thus we can skip this stuff 972 */ 973 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 974 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 975 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 976 } 977 978 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 979 PetscCall(MatAssemblyBegin(baij->B, mode)); 980 PetscCall(MatAssemblyEnd(baij->B, mode)); 981 982 #if defined(PETSC_USE_INFO) 983 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 984 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 985 986 baij->ht_total_ct = 0; 987 baij->ht_insert_ct = 0; 988 } 989 #endif 990 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 991 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 992 993 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 994 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 995 } 996 997 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 998 999 baij->rowvalues = NULL; 1000 1001 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1002 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1003 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1004 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1005 } 1006 PetscFunctionReturn(PETSC_SUCCESS); 1007 } 1008 1009 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1010 #include <petscdraw.h> 1011 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1012 { 1013 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1014 PetscMPIInt rank = baij->rank; 1015 PetscInt bs = mat->rmap->bs; 1016 PetscBool iascii, isdraw; 1017 PetscViewer sviewer; 1018 PetscViewerFormat format; 1019 1020 PetscFunctionBegin; 1021 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1022 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1023 if (iascii) { 1024 PetscCall(PetscViewerGetFormat(viewer, &format)); 1025 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1026 MatInfo info; 1027 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1028 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1029 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1030 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1031 mat->rmap->bs, (double)info.memory)); 1032 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1035 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1036 PetscCall(PetscViewerFlush(viewer)); 1037 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1038 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1039 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1040 PetscFunctionReturn(PETSC_SUCCESS); 1041 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1042 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1045 PetscFunctionReturn(PETSC_SUCCESS); 1046 } 1047 } 1048 1049 if (isdraw) { 1050 PetscDraw draw; 1051 PetscBool isnull; 1052 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1053 PetscCall(PetscDrawIsNull(draw, &isnull)); 1054 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1055 } 1056 1057 { 1058 /* assemble the entire matrix onto first processor. */ 1059 Mat A; 1060 Mat_SeqBAIJ *Aloc; 1061 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1062 MatScalar *a; 1063 const char *matname; 1064 1065 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1066 /* Perhaps this should be the type of mat? */ 1067 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1068 if (rank == 0) { 1069 PetscCall(MatSetSizes(A, M, N, M, N)); 1070 } else { 1071 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1072 } 1073 PetscCall(MatSetType(A, MATMPIBAIJ)); 1074 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1075 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1076 1077 /* copy over the A part */ 1078 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1079 ai = Aloc->i; 1080 aj = Aloc->j; 1081 a = Aloc->a; 1082 PetscCall(PetscMalloc1(bs, &rvals)); 1083 1084 for (i = 0; i < mbs; i++) { 1085 rvals[0] = bs * (baij->rstartbs + i); 1086 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1087 for (j = ai[i]; j < ai[i + 1]; j++) { 1088 col = (baij->cstartbs + aj[j]) * bs; 1089 for (k = 0; k < bs; k++) { 1090 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1091 col++; 1092 a += bs; 1093 } 1094 } 1095 } 1096 /* copy over the B part */ 1097 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1098 ai = Aloc->i; 1099 aj = Aloc->j; 1100 a = Aloc->a; 1101 for (i = 0; i < mbs; i++) { 1102 rvals[0] = bs * (baij->rstartbs + i); 1103 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1104 for (j = ai[i]; j < ai[i + 1]; j++) { 1105 col = baij->garray[aj[j]] * bs; 1106 for (k = 0; k < bs; k++) { 1107 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1108 col++; 1109 a += bs; 1110 } 1111 } 1112 } 1113 PetscCall(PetscFree(rvals)); 1114 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1115 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1116 /* 1117 Everyone has to call to draw the matrix since the graphics waits are 1118 synchronized across all processors that share the PetscDraw object 1119 */ 1120 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1122 if (rank == 0) { 1123 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1124 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1125 } 1126 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1127 PetscCall(MatDestroy(&A)); 1128 } 1129 PetscFunctionReturn(PETSC_SUCCESS); 1130 } 1131 1132 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1133 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1134 { 1135 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1136 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1137 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1138 const PetscInt *garray = aij->garray; 1139 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1140 PetscInt64 nz, hnz; 1141 PetscInt *rowlens, *colidxs; 1142 PetscScalar *matvals; 1143 PetscMPIInt rank; 1144 1145 PetscFunctionBegin; 1146 PetscCall(PetscViewerSetUp(viewer)); 1147 1148 M = mat->rmap->N; 1149 N = mat->cmap->N; 1150 m = mat->rmap->n; 1151 rs = mat->rmap->rstart; 1152 cs = mat->cmap->rstart; 1153 bs = mat->rmap->bs; 1154 nz = bs * bs * (A->nz + B->nz); 1155 1156 /* write matrix header */ 1157 header[0] = MAT_FILE_CLASSID; 1158 header[1] = M; 1159 header[2] = N; 1160 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1161 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1162 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1163 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1164 1165 /* fill in and store row lengths */ 1166 PetscCall(PetscMalloc1(m, &rowlens)); 1167 for (cnt = 0, i = 0; i < A->mbs; i++) 1168 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1169 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1170 PetscCall(PetscFree(rowlens)); 1171 1172 /* fill in and store column indices */ 1173 PetscCall(PetscMalloc1(nz, &colidxs)); 1174 for (cnt = 0, i = 0; i < A->mbs; i++) { 1175 for (k = 0; k < bs; k++) { 1176 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1177 if (garray[B->j[jb]] > cs / bs) break; 1178 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1179 } 1180 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1182 for (; jb < B->i[i + 1]; jb++) 1183 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1184 } 1185 } 1186 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1187 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1188 PetscCall(PetscFree(colidxs)); 1189 1190 /* fill in and store nonzero values */ 1191 PetscCall(PetscMalloc1(nz, &matvals)); 1192 for (cnt = 0, i = 0; i < A->mbs; i++) { 1193 for (k = 0; k < bs; k++) { 1194 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1195 if (garray[B->j[jb]] > cs / bs) break; 1196 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1197 } 1198 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1200 for (; jb < B->i[i + 1]; jb++) 1201 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1202 } 1203 } 1204 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1205 PetscCall(PetscFree(matvals)); 1206 1207 /* write block size option to the viewer's .info file */ 1208 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1209 PetscFunctionReturn(PETSC_SUCCESS); 1210 } 1211 1212 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1213 { 1214 PetscBool iascii, isdraw, issocket, isbinary; 1215 1216 PetscFunctionBegin; 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1219 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1220 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1221 if (iascii || isdraw || issocket) { 1222 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1223 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1224 PetscFunctionReturn(PETSC_SUCCESS); 1225 } 1226 1227 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1228 { 1229 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1230 PetscInt nt; 1231 1232 PetscFunctionBegin; 1233 PetscCall(VecGetLocalSize(xx, &nt)); 1234 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1235 PetscCall(VecGetLocalSize(yy, &nt)); 1236 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1237 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1239 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1240 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1241 PetscFunctionReturn(PETSC_SUCCESS); 1242 } 1243 1244 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1245 { 1246 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1247 1248 PetscFunctionBegin; 1249 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1251 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1252 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1253 PetscFunctionReturn(PETSC_SUCCESS); 1254 } 1255 1256 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1257 { 1258 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1259 1260 PetscFunctionBegin; 1261 /* do nondiagonal part */ 1262 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1263 /* do local part */ 1264 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1265 /* add partial results together */ 1266 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1267 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1268 PetscFunctionReturn(PETSC_SUCCESS); 1269 } 1270 1271 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1272 { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1274 1275 PetscFunctionBegin; 1276 /* do nondiagonal part */ 1277 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1278 /* do local part */ 1279 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1280 /* add partial results together */ 1281 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1282 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1283 PetscFunctionReturn(PETSC_SUCCESS); 1284 } 1285 1286 /* 1287 This only works correctly for square matrices where the subblock A->A is the 1288 diagonal block 1289 */ 1290 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1291 { 1292 PetscFunctionBegin; 1293 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1294 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1295 PetscFunctionReturn(PETSC_SUCCESS); 1296 } 1297 1298 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1299 { 1300 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1301 1302 PetscFunctionBegin; 1303 PetscCall(MatScale(a->A, aa)); 1304 PetscCall(MatScale(a->B, aa)); 1305 PetscFunctionReturn(PETSC_SUCCESS); 1306 } 1307 1308 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1309 { 1310 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1311 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1312 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1313 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1314 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1315 1316 PetscFunctionBegin; 1317 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1318 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1319 mat->getrowactive = PETSC_TRUE; 1320 1321 if (!mat->rowvalues && (idx || v)) { 1322 /* 1323 allocate enough space to hold information from the longest row. 1324 */ 1325 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1326 PetscInt max = 1, mbs = mat->mbs, tmp; 1327 for (i = 0; i < mbs; i++) { 1328 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1329 if (max < tmp) max = tmp; 1330 } 1331 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1332 } 1333 lrow = row - brstart; 1334 1335 pvA = &vworkA; 1336 pcA = &cworkA; 1337 pvB = &vworkB; 1338 pcB = &cworkB; 1339 if (!v) { 1340 pvA = NULL; 1341 pvB = NULL; 1342 } 1343 if (!idx) { 1344 pcA = NULL; 1345 if (!v) pcB = NULL; 1346 } 1347 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1348 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1349 nztot = nzA + nzB; 1350 1351 cmap = mat->garray; 1352 if (v || idx) { 1353 if (nztot) { 1354 /* Sort by increasing column numbers, assuming A and B already sorted */ 1355 PetscInt imark = -1; 1356 if (v) { 1357 *v = v_p = mat->rowvalues; 1358 for (i = 0; i < nzB; i++) { 1359 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1360 else break; 1361 } 1362 imark = i; 1363 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1364 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1365 } 1366 if (idx) { 1367 *idx = idx_p = mat->rowindices; 1368 if (imark > -1) { 1369 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1370 } else { 1371 for (i = 0; i < nzB; i++) { 1372 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1373 else break; 1374 } 1375 imark = i; 1376 } 1377 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1378 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1379 } 1380 } else { 1381 if (idx) *idx = NULL; 1382 if (v) *v = NULL; 1383 } 1384 } 1385 *nz = nztot; 1386 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1387 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1388 PetscFunctionReturn(PETSC_SUCCESS); 1389 } 1390 1391 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1392 { 1393 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1394 1395 PetscFunctionBegin; 1396 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1397 baij->getrowactive = PETSC_FALSE; 1398 PetscFunctionReturn(PETSC_SUCCESS); 1399 } 1400 1401 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1402 { 1403 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1404 1405 PetscFunctionBegin; 1406 PetscCall(MatZeroEntries(l->A)); 1407 PetscCall(MatZeroEntries(l->B)); 1408 PetscFunctionReturn(PETSC_SUCCESS); 1409 } 1410 1411 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1412 { 1413 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1414 Mat A = a->A, B = a->B; 1415 PetscLogDouble isend[5], irecv[5]; 1416 1417 PetscFunctionBegin; 1418 info->block_size = (PetscReal)matin->rmap->bs; 1419 1420 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1421 1422 isend[0] = info->nz_used; 1423 isend[1] = info->nz_allocated; 1424 isend[2] = info->nz_unneeded; 1425 isend[3] = info->memory; 1426 isend[4] = info->mallocs; 1427 1428 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1429 1430 isend[0] += info->nz_used; 1431 isend[1] += info->nz_allocated; 1432 isend[2] += info->nz_unneeded; 1433 isend[3] += info->memory; 1434 isend[4] += info->mallocs; 1435 1436 if (flag == MAT_LOCAL) { 1437 info->nz_used = isend[0]; 1438 info->nz_allocated = isend[1]; 1439 info->nz_unneeded = isend[2]; 1440 info->memory = isend[3]; 1441 info->mallocs = isend[4]; 1442 } else if (flag == MAT_GLOBAL_MAX) { 1443 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1444 1445 info->nz_used = irecv[0]; 1446 info->nz_allocated = irecv[1]; 1447 info->nz_unneeded = irecv[2]; 1448 info->memory = irecv[3]; 1449 info->mallocs = irecv[4]; 1450 } else if (flag == MAT_GLOBAL_SUM) { 1451 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1452 1453 info->nz_used = irecv[0]; 1454 info->nz_allocated = irecv[1]; 1455 info->nz_unneeded = irecv[2]; 1456 info->memory = irecv[3]; 1457 info->mallocs = irecv[4]; 1458 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1459 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1460 info->fill_ratio_needed = 0; 1461 info->factor_mallocs = 0; 1462 PetscFunctionReturn(PETSC_SUCCESS); 1463 } 1464 1465 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1466 { 1467 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1468 1469 PetscFunctionBegin; 1470 switch (op) { 1471 case MAT_NEW_NONZERO_LOCATIONS: 1472 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1473 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1474 case MAT_KEEP_NONZERO_PATTERN: 1475 case MAT_NEW_NONZERO_LOCATION_ERR: 1476 MatCheckPreallocated(A, 1); 1477 PetscCall(MatSetOption(a->A, op, flg)); 1478 PetscCall(MatSetOption(a->B, op, flg)); 1479 break; 1480 case MAT_ROW_ORIENTED: 1481 MatCheckPreallocated(A, 1); 1482 a->roworiented = flg; 1483 1484 PetscCall(MatSetOption(a->A, op, flg)); 1485 PetscCall(MatSetOption(a->B, op, flg)); 1486 break; 1487 case MAT_FORCE_DIAGONAL_ENTRIES: 1488 case MAT_SORTED_FULL: 1489 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1490 break; 1491 case MAT_IGNORE_OFF_PROC_ENTRIES: 1492 a->donotstash = flg; 1493 break; 1494 case MAT_USE_HASH_TABLE: 1495 a->ht_flag = flg; 1496 a->ht_fact = 1.39; 1497 break; 1498 case MAT_SYMMETRIC: 1499 case MAT_STRUCTURALLY_SYMMETRIC: 1500 case MAT_HERMITIAN: 1501 case MAT_SUBMAT_SINGLEIS: 1502 case MAT_SYMMETRY_ETERNAL: 1503 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1504 case MAT_SPD_ETERNAL: 1505 /* if the diagonal matrix is square it inherits some of the properties above */ 1506 break; 1507 default: 1508 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1509 } 1510 PetscFunctionReturn(PETSC_SUCCESS); 1511 } 1512 1513 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1514 { 1515 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1516 Mat_SeqBAIJ *Aloc; 1517 Mat B; 1518 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1519 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1520 MatScalar *a; 1521 1522 PetscFunctionBegin; 1523 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1524 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1525 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1526 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1527 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1528 /* Do not know preallocation information, but must set block size */ 1529 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1530 } else { 1531 B = *matout; 1532 } 1533 1534 /* copy over the A part */ 1535 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1536 ai = Aloc->i; 1537 aj = Aloc->j; 1538 a = Aloc->a; 1539 PetscCall(PetscMalloc1(bs, &rvals)); 1540 1541 for (i = 0; i < mbs; i++) { 1542 rvals[0] = bs * (baij->rstartbs + i); 1543 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1544 for (j = ai[i]; j < ai[i + 1]; j++) { 1545 col = (baij->cstartbs + aj[j]) * bs; 1546 for (k = 0; k < bs; k++) { 1547 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1548 1549 col++; 1550 a += bs; 1551 } 1552 } 1553 } 1554 /* copy over the B part */ 1555 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1556 ai = Aloc->i; 1557 aj = Aloc->j; 1558 a = Aloc->a; 1559 for (i = 0; i < mbs; i++) { 1560 rvals[0] = bs * (baij->rstartbs + i); 1561 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1562 for (j = ai[i]; j < ai[i + 1]; j++) { 1563 col = baij->garray[aj[j]] * bs; 1564 for (k = 0; k < bs; k++) { 1565 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1566 col++; 1567 a += bs; 1568 } 1569 } 1570 } 1571 PetscCall(PetscFree(rvals)); 1572 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1573 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1574 1575 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1576 else PetscCall(MatHeaderMerge(A, &B)); 1577 PetscFunctionReturn(PETSC_SUCCESS); 1578 } 1579 1580 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1581 { 1582 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1583 Mat a = baij->A, b = baij->B; 1584 PetscInt s1, s2, s3; 1585 1586 PetscFunctionBegin; 1587 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1588 if (rr) { 1589 PetscCall(VecGetLocalSize(rr, &s1)); 1590 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1591 /* Overlap communication with computation. */ 1592 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1593 } 1594 if (ll) { 1595 PetscCall(VecGetLocalSize(ll, &s1)); 1596 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1597 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1598 } 1599 /* scale the diagonal block */ 1600 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1601 1602 if (rr) { 1603 /* Do a scatter end and then right scale the off-diagonal block */ 1604 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1605 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1606 } 1607 PetscFunctionReturn(PETSC_SUCCESS); 1608 } 1609 1610 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1611 { 1612 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1613 PetscInt *lrows; 1614 PetscInt r, len; 1615 PetscBool cong; 1616 1617 PetscFunctionBegin; 1618 /* get locally owned rows */ 1619 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1620 /* fix right hand side if needed */ 1621 if (x && b) { 1622 const PetscScalar *xx; 1623 PetscScalar *bb; 1624 1625 PetscCall(VecGetArrayRead(x, &xx)); 1626 PetscCall(VecGetArray(b, &bb)); 1627 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1628 PetscCall(VecRestoreArrayRead(x, &xx)); 1629 PetscCall(VecRestoreArray(b, &bb)); 1630 } 1631 1632 /* actually zap the local rows */ 1633 /* 1634 Zero the required rows. If the "diagonal block" of the matrix 1635 is square and the user wishes to set the diagonal we use separate 1636 code so that MatSetValues() is not called for each diagonal allocating 1637 new memory, thus calling lots of mallocs and slowing things down. 1638 1639 */ 1640 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1641 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1642 PetscCall(MatHasCongruentLayouts(A, &cong)); 1643 if ((diag != 0.0) && cong) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1645 } else if (diag != 0.0) { 1646 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1647 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1648 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1649 for (r = 0; r < len; ++r) { 1650 const PetscInt row = lrows[r] + A->rmap->rstart; 1651 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1652 } 1653 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1654 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1655 } else { 1656 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1657 } 1658 PetscCall(PetscFree(lrows)); 1659 1660 /* only change matrix nonzero state if pattern was allowed to be changed */ 1661 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1662 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1663 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1664 } 1665 PetscFunctionReturn(PETSC_SUCCESS); 1666 } 1667 1668 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1669 { 1670 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1671 PetscMPIInt n = A->rmap->n, p = 0; 1672 PetscInt i, j, k, r, len = 0, row, col, count; 1673 PetscInt *lrows, *owners = A->rmap->range; 1674 PetscSFNode *rrows; 1675 PetscSF sf; 1676 const PetscScalar *xx; 1677 PetscScalar *bb, *mask; 1678 Vec xmask, lmask; 1679 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1680 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1681 PetscScalar *aa; 1682 1683 PetscFunctionBegin; 1684 /* Create SF where leaves are input rows and roots are owned rows */ 1685 PetscCall(PetscMalloc1(n, &lrows)); 1686 for (r = 0; r < n; ++r) lrows[r] = -1; 1687 PetscCall(PetscMalloc1(N, &rrows)); 1688 for (r = 0; r < N; ++r) { 1689 const PetscInt idx = rows[r]; 1690 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1691 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1692 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1693 } 1694 rrows[r].rank = p; 1695 rrows[r].index = rows[r] - owners[p]; 1696 } 1697 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1698 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1699 /* Collect flags for rows to be zeroed */ 1700 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1701 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1702 PetscCall(PetscSFDestroy(&sf)); 1703 /* Compress and put in row numbers */ 1704 for (r = 0; r < n; ++r) 1705 if (lrows[r] >= 0) lrows[len++] = r; 1706 /* zero diagonal part of matrix */ 1707 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1708 /* handle off-diagonal part of matrix */ 1709 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1710 PetscCall(VecDuplicate(l->lvec, &lmask)); 1711 PetscCall(VecGetArray(xmask, &bb)); 1712 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1713 PetscCall(VecRestoreArray(xmask, &bb)); 1714 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1716 PetscCall(VecDestroy(&xmask)); 1717 if (x) { 1718 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1719 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1720 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1721 PetscCall(VecGetArray(b, &bb)); 1722 } 1723 PetscCall(VecGetArray(lmask, &mask)); 1724 /* remove zeroed rows of off-diagonal matrix */ 1725 for (i = 0; i < len; ++i) { 1726 row = lrows[i]; 1727 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1728 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1729 for (k = 0; k < count; ++k) { 1730 aa[0] = 0.0; 1731 aa += bs; 1732 } 1733 } 1734 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1735 for (i = 0; i < l->B->rmap->N; ++i) { 1736 row = i / bs; 1737 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1738 for (k = 0; k < bs; ++k) { 1739 col = bs * baij->j[j] + k; 1740 if (PetscAbsScalar(mask[col])) { 1741 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1742 if (x) bb[i] -= aa[0] * xx[col]; 1743 aa[0] = 0.0; 1744 } 1745 } 1746 } 1747 } 1748 if (x) { 1749 PetscCall(VecRestoreArray(b, &bb)); 1750 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1751 } 1752 PetscCall(VecRestoreArray(lmask, &mask)); 1753 PetscCall(VecDestroy(&lmask)); 1754 PetscCall(PetscFree(lrows)); 1755 1756 /* only change matrix nonzero state if pattern was allowed to be changed */ 1757 if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1758 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1759 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1760 } 1761 PetscFunctionReturn(PETSC_SUCCESS); 1762 } 1763 1764 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1765 { 1766 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1767 1768 PetscFunctionBegin; 1769 PetscCall(MatSetUnfactored(a->A)); 1770 PetscFunctionReturn(PETSC_SUCCESS); 1771 } 1772 1773 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1774 1775 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1776 { 1777 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1778 Mat a, b, c, d; 1779 PetscBool flg; 1780 1781 PetscFunctionBegin; 1782 a = matA->A; 1783 b = matA->B; 1784 c = matB->A; 1785 d = matB->B; 1786 1787 PetscCall(MatEqual(a, c, &flg)); 1788 if (flg) PetscCall(MatEqual(b, d, &flg)); 1789 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1790 PetscFunctionReturn(PETSC_SUCCESS); 1791 } 1792 1793 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1794 { 1795 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1796 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1797 1798 PetscFunctionBegin; 1799 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1800 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1801 PetscCall(MatCopy_Basic(A, B, str)); 1802 } else { 1803 PetscCall(MatCopy(a->A, b->A, str)); 1804 PetscCall(MatCopy(a->B, b->B, str)); 1805 } 1806 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1807 PetscFunctionReturn(PETSC_SUCCESS); 1808 } 1809 1810 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1811 { 1812 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1813 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1814 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1815 1816 PetscFunctionBegin; 1817 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1818 PetscFunctionReturn(PETSC_SUCCESS); 1819 } 1820 1821 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1822 { 1823 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1824 PetscBLASInt bnz, one = 1; 1825 Mat_SeqBAIJ *x, *y; 1826 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1827 1828 PetscFunctionBegin; 1829 if (str == SAME_NONZERO_PATTERN) { 1830 PetscScalar alpha = a; 1831 x = (Mat_SeqBAIJ *)xx->A->data; 1832 y = (Mat_SeqBAIJ *)yy->A->data; 1833 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1834 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1835 x = (Mat_SeqBAIJ *)xx->B->data; 1836 y = (Mat_SeqBAIJ *)yy->B->data; 1837 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1838 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1839 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1840 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1841 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1842 } else { 1843 Mat B; 1844 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1845 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1846 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1847 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1848 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1849 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1850 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1851 PetscCall(MatSetType(B, MATMPIBAIJ)); 1852 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1853 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1854 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1855 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1856 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1857 PetscCall(MatHeaderMerge(Y, &B)); 1858 PetscCall(PetscFree(nnz_d)); 1859 PetscCall(PetscFree(nnz_o)); 1860 } 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1865 { 1866 PetscFunctionBegin; 1867 if (PetscDefined(USE_COMPLEX)) { 1868 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1869 1870 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1871 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1872 } 1873 PetscFunctionReturn(PETSC_SUCCESS); 1874 } 1875 1876 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1877 { 1878 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1879 1880 PetscFunctionBegin; 1881 PetscCall(MatRealPart(a->A)); 1882 PetscCall(MatRealPart(a->B)); 1883 PetscFunctionReturn(PETSC_SUCCESS); 1884 } 1885 1886 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1887 { 1888 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1889 1890 PetscFunctionBegin; 1891 PetscCall(MatImaginaryPart(a->A)); 1892 PetscCall(MatImaginaryPart(a->B)); 1893 PetscFunctionReturn(PETSC_SUCCESS); 1894 } 1895 1896 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1897 { 1898 IS iscol_local; 1899 PetscInt csize; 1900 1901 PetscFunctionBegin; 1902 PetscCall(ISGetLocalSize(iscol, &csize)); 1903 if (call == MAT_REUSE_MATRIX) { 1904 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1905 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1906 } else { 1907 PetscCall(ISAllGather(iscol, &iscol_local)); 1908 } 1909 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1910 if (call == MAT_INITIAL_MATRIX) { 1911 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1912 PetscCall(ISDestroy(&iscol_local)); 1913 } 1914 PetscFunctionReturn(PETSC_SUCCESS); 1915 } 1916 1917 /* 1918 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1919 in local and then by concatenating the local matrices the end result. 1920 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1921 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1922 */ 1923 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1924 { 1925 PetscMPIInt rank, size; 1926 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1927 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1928 Mat M, Mreuse; 1929 MatScalar *vwork, *aa; 1930 MPI_Comm comm; 1931 IS isrow_new, iscol_new; 1932 Mat_SeqBAIJ *aij; 1933 1934 PetscFunctionBegin; 1935 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1936 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1937 PetscCallMPI(MPI_Comm_size(comm, &size)); 1938 /* The compression and expansion should be avoided. Doesn't point 1939 out errors, might change the indices, hence buggey */ 1940 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1941 if (isrow == iscol) { 1942 iscol_new = isrow_new; 1943 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1944 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1945 1946 if (call == MAT_REUSE_MATRIX) { 1947 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1948 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1949 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1950 } else { 1951 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1952 } 1953 PetscCall(ISDestroy(&isrow_new)); 1954 PetscCall(ISDestroy(&iscol_new)); 1955 /* 1956 m - number of local rows 1957 n - number of columns (same on all processors) 1958 rstart - first row in new global matrix generated 1959 */ 1960 PetscCall(MatGetBlockSize(mat, &bs)); 1961 PetscCall(MatGetSize(Mreuse, &m, &n)); 1962 m = m / bs; 1963 n = n / bs; 1964 1965 if (call == MAT_INITIAL_MATRIX) { 1966 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1967 ii = aij->i; 1968 jj = aij->j; 1969 1970 /* 1971 Determine the number of non-zeros in the diagonal and off-diagonal 1972 portions of the matrix in order to do correct preallocation 1973 */ 1974 1975 /* first get start and end of "diagonal" columns */ 1976 if (csize == PETSC_DECIDE) { 1977 PetscCall(ISGetSize(isrow, &mglobal)); 1978 if (mglobal == n * bs) { /* square matrix */ 1979 nlocal = m; 1980 } else { 1981 nlocal = n / size + ((n % size) > rank); 1982 } 1983 } else { 1984 nlocal = csize / bs; 1985 } 1986 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1987 rstart = rend - nlocal; 1988 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1989 1990 /* next, compute all the lengths */ 1991 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1992 for (i = 0; i < m; i++) { 1993 jend = ii[i + 1] - ii[i]; 1994 olen = 0; 1995 dlen = 0; 1996 for (j = 0; j < jend; j++) { 1997 if (*jj < rstart || *jj >= rend) olen++; 1998 else dlen++; 1999 jj++; 2000 } 2001 olens[i] = olen; 2002 dlens[i] = dlen; 2003 } 2004 PetscCall(MatCreate(comm, &M)); 2005 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2006 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2007 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2009 PetscCall(PetscFree2(dlens, olens)); 2010 } else { 2011 PetscInt ml, nl; 2012 2013 M = *newmat; 2014 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2015 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2016 PetscCall(MatZeroEntries(M)); 2017 /* 2018 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2019 rather than the slower MatSetValues(). 2020 */ 2021 M->was_assembled = PETSC_TRUE; 2022 M->assembled = PETSC_FALSE; 2023 } 2024 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2025 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2026 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2027 ii = aij->i; 2028 jj = aij->j; 2029 aa = aij->a; 2030 for (i = 0; i < m; i++) { 2031 row = rstart / bs + i; 2032 nz = ii[i + 1] - ii[i]; 2033 cwork = jj; 2034 jj = PetscSafePointerPlusOffset(jj, nz); 2035 vwork = aa; 2036 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2037 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2038 } 2039 2040 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2041 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2042 *newmat = M; 2043 2044 /* save submatrix used in processor for next request */ 2045 if (call == MAT_INITIAL_MATRIX) { 2046 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2047 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2048 } 2049 PetscFunctionReturn(PETSC_SUCCESS); 2050 } 2051 2052 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2053 { 2054 MPI_Comm comm, pcomm; 2055 PetscInt clocal_size, nrows; 2056 const PetscInt *rows; 2057 PetscMPIInt size; 2058 IS crowp, lcolp; 2059 2060 PetscFunctionBegin; 2061 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2062 /* make a collective version of 'rowp' */ 2063 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2064 if (pcomm == comm) { 2065 crowp = rowp; 2066 } else { 2067 PetscCall(ISGetSize(rowp, &nrows)); 2068 PetscCall(ISGetIndices(rowp, &rows)); 2069 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2070 PetscCall(ISRestoreIndices(rowp, &rows)); 2071 } 2072 PetscCall(ISSetPermutation(crowp)); 2073 /* make a local version of 'colp' */ 2074 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2075 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2076 if (size == 1) { 2077 lcolp = colp; 2078 } else { 2079 PetscCall(ISAllGather(colp, &lcolp)); 2080 } 2081 PetscCall(ISSetPermutation(lcolp)); 2082 /* now we just get the submatrix */ 2083 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2084 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2085 /* clean up */ 2086 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2087 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2088 PetscFunctionReturn(PETSC_SUCCESS); 2089 } 2090 2091 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2092 { 2093 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2094 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2095 2096 PetscFunctionBegin; 2097 if (nghosts) *nghosts = B->nbs; 2098 if (ghosts) *ghosts = baij->garray; 2099 PetscFunctionReturn(PETSC_SUCCESS); 2100 } 2101 2102 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2103 { 2104 Mat B; 2105 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2106 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2107 Mat_SeqAIJ *b; 2108 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2109 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2110 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2111 2112 PetscFunctionBegin; 2113 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2114 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2115 2116 /* Tell every processor the number of nonzeros per row */ 2117 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2118 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2119 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2120 displs = recvcounts + size; 2121 for (i = 0; i < size; i++) { 2122 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2123 displs[i] = A->rmap->range[i] / bs; 2124 } 2125 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2126 /* Create the sequential matrix of the same type as the local block diagonal */ 2127 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2128 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2129 PetscCall(MatSetType(B, MATSEQAIJ)); 2130 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2131 b = (Mat_SeqAIJ *)B->data; 2132 2133 /* Copy my part of matrix column indices over */ 2134 sendcount = ad->nz + bd->nz; 2135 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2136 a_jsendbuf = ad->j; 2137 b_jsendbuf = bd->j; 2138 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2139 cnt = 0; 2140 for (i = 0; i < n; i++) { 2141 /* put in lower diagonal portion */ 2142 m = bd->i[i + 1] - bd->i[i]; 2143 while (m > 0) { 2144 /* is it above diagonal (in bd (compressed) numbering) */ 2145 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2146 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2147 m--; 2148 } 2149 2150 /* put in diagonal portion */ 2151 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2152 2153 /* put in upper diagonal portion */ 2154 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2155 } 2156 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2157 2158 /* Gather all column indices to all processors */ 2159 for (i = 0; i < size; i++) { 2160 recvcounts[i] = 0; 2161 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2162 } 2163 displs[0] = 0; 2164 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2165 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2166 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2167 /* set the b->ilen (length of each row) values */ 2168 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2169 /* set the b->i indices */ 2170 b->i[0] = 0; 2171 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2172 PetscCall(PetscFree(lens)); 2173 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2174 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2175 PetscCall(PetscFree(recvcounts)); 2176 2177 PetscCall(MatPropagateSymmetryOptions(A, B)); 2178 *newmat = B; 2179 PetscFunctionReturn(PETSC_SUCCESS); 2180 } 2181 2182 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2183 { 2184 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2185 Vec bb1 = NULL; 2186 2187 PetscFunctionBegin; 2188 if (flag == SOR_APPLY_UPPER) { 2189 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2190 PetscFunctionReturn(PETSC_SUCCESS); 2191 } 2192 2193 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2194 2195 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2196 if (flag & SOR_ZERO_INITIAL_GUESS) { 2197 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2198 its--; 2199 } 2200 2201 while (its--) { 2202 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2203 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2204 2205 /* update rhs: bb1 = bb - B*x */ 2206 PetscCall(VecScale(mat->lvec, -1.0)); 2207 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2208 2209 /* local sweep */ 2210 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2211 } 2212 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2213 if (flag & SOR_ZERO_INITIAL_GUESS) { 2214 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2215 its--; 2216 } 2217 while (its--) { 2218 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2219 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2220 2221 /* update rhs: bb1 = bb - B*x */ 2222 PetscCall(VecScale(mat->lvec, -1.0)); 2223 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2224 2225 /* local sweep */ 2226 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2227 } 2228 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2229 if (flag & SOR_ZERO_INITIAL_GUESS) { 2230 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2231 its--; 2232 } 2233 while (its--) { 2234 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2235 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2236 2237 /* update rhs: bb1 = bb - B*x */ 2238 PetscCall(VecScale(mat->lvec, -1.0)); 2239 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2240 2241 /* local sweep */ 2242 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2243 } 2244 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2245 2246 PetscCall(VecDestroy(&bb1)); 2247 PetscFunctionReturn(PETSC_SUCCESS); 2248 } 2249 2250 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2251 { 2252 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2253 PetscInt m, N, i, *garray = aij->garray; 2254 PetscInt ib, jb, bs = A->rmap->bs; 2255 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2256 MatScalar *a_val = a_aij->a; 2257 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2258 MatScalar *b_val = b_aij->a; 2259 PetscReal *work; 2260 2261 PetscFunctionBegin; 2262 PetscCall(MatGetSize(A, &m, &N)); 2263 PetscCall(PetscCalloc1(N, &work)); 2264 if (type == NORM_2) { 2265 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2269 a_val++; 2270 } 2271 } 2272 } 2273 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2274 for (jb = 0; jb < bs; jb++) { 2275 for (ib = 0; ib < bs; ib++) { 2276 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2277 b_val++; 2278 } 2279 } 2280 } 2281 } else if (type == NORM_1) { 2282 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2286 a_val++; 2287 } 2288 } 2289 } 2290 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2291 for (jb = 0; jb < bs; jb++) { 2292 for (ib = 0; ib < bs; ib++) { 2293 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2294 b_val++; 2295 } 2296 } 2297 } 2298 } else if (type == NORM_INFINITY) { 2299 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2303 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2304 a_val++; 2305 } 2306 } 2307 } 2308 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 int col = garray[b_aij->j[i]] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2313 b_val++; 2314 } 2315 } 2316 } 2317 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2318 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2319 for (jb = 0; jb < bs; jb++) { 2320 for (ib = 0; ib < bs; ib++) { 2321 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2322 a_val++; 2323 } 2324 } 2325 } 2326 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2327 for (jb = 0; jb < bs; jb++) { 2328 for (ib = 0; ib < bs; ib++) { 2329 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2330 b_val++; 2331 } 2332 } 2333 } 2334 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2335 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2339 a_val++; 2340 } 2341 } 2342 } 2343 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2344 for (jb = 0; jb < bs; jb++) { 2345 for (ib = 0; ib < bs; ib++) { 2346 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2347 b_val++; 2348 } 2349 } 2350 } 2351 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2352 if (type == NORM_INFINITY) { 2353 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2354 } else { 2355 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2356 } 2357 PetscCall(PetscFree(work)); 2358 if (type == NORM_2) { 2359 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2360 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2361 for (i = 0; i < N; i++) reductions[i] /= m; 2362 } 2363 PetscFunctionReturn(PETSC_SUCCESS); 2364 } 2365 2366 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2367 { 2368 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2369 2370 PetscFunctionBegin; 2371 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2372 A->factorerrortype = a->A->factorerrortype; 2373 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2374 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2375 PetscFunctionReturn(PETSC_SUCCESS); 2376 } 2377 2378 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2379 { 2380 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2381 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2382 2383 PetscFunctionBegin; 2384 if (!Y->preallocated) { 2385 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2386 } else if (!aij->nz) { 2387 PetscInt nonew = aij->nonew; 2388 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2389 aij->nonew = nonew; 2390 } 2391 PetscCall(MatShift_Basic(Y, a)); 2392 PetscFunctionReturn(PETSC_SUCCESS); 2393 } 2394 2395 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2396 { 2397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2398 2399 PetscFunctionBegin; 2400 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2401 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2402 if (d) { 2403 PetscInt rstart; 2404 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2405 *d += rstart / A->rmap->bs; 2406 } 2407 PetscFunctionReturn(PETSC_SUCCESS); 2408 } 2409 2410 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2411 { 2412 PetscFunctionBegin; 2413 *a = ((Mat_MPIBAIJ *)A->data)->A; 2414 PetscFunctionReturn(PETSC_SUCCESS); 2415 } 2416 2417 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2418 { 2419 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2420 2421 PetscFunctionBegin; 2422 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2423 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2424 PetscFunctionReturn(PETSC_SUCCESS); 2425 } 2426 2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2428 MatGetRow_MPIBAIJ, 2429 MatRestoreRow_MPIBAIJ, 2430 MatMult_MPIBAIJ, 2431 /* 4*/ MatMultAdd_MPIBAIJ, 2432 MatMultTranspose_MPIBAIJ, 2433 MatMultTransposeAdd_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 /*10*/ NULL, 2438 NULL, 2439 NULL, 2440 MatSOR_MPIBAIJ, 2441 MatTranspose_MPIBAIJ, 2442 /*15*/ MatGetInfo_MPIBAIJ, 2443 MatEqual_MPIBAIJ, 2444 MatGetDiagonal_MPIBAIJ, 2445 MatDiagonalScale_MPIBAIJ, 2446 MatNorm_MPIBAIJ, 2447 /*20*/ MatAssemblyBegin_MPIBAIJ, 2448 MatAssemblyEnd_MPIBAIJ, 2449 MatSetOption_MPIBAIJ, 2450 MatZeroEntries_MPIBAIJ, 2451 /*24*/ MatZeroRows_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*29*/ MatSetUp_MPI_Hash, 2457 NULL, 2458 NULL, 2459 MatGetDiagonalBlock_MPIBAIJ, 2460 NULL, 2461 /*34*/ MatDuplicate_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 NULL, 2465 NULL, 2466 /*39*/ MatAXPY_MPIBAIJ, 2467 MatCreateSubMatrices_MPIBAIJ, 2468 MatIncreaseOverlap_MPIBAIJ, 2469 MatGetValues_MPIBAIJ, 2470 MatCopy_MPIBAIJ, 2471 /*44*/ NULL, 2472 MatScale_MPIBAIJ, 2473 MatShift_MPIBAIJ, 2474 NULL, 2475 MatZeroRowsColumns_MPIBAIJ, 2476 /*49*/ NULL, 2477 NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 /*54*/ MatFDColoringCreate_MPIXAIJ, 2482 NULL, 2483 MatSetUnfactored_MPIBAIJ, 2484 MatPermute_MPIBAIJ, 2485 MatSetValuesBlocked_MPIBAIJ, 2486 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2487 MatDestroy_MPIBAIJ, 2488 MatView_MPIBAIJ, 2489 NULL, 2490 NULL, 2491 /*64*/ NULL, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*74*/ NULL, 2502 MatFDColoringApply_BAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*79*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 MatLoad_MPIBAIJ, 2511 /*84*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*89*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*94*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*99*/ NULL, 2527 NULL, 2528 NULL, 2529 MatConjugate_MPIBAIJ, 2530 NULL, 2531 /*104*/ NULL, 2532 MatRealPart_MPIBAIJ, 2533 MatImaginaryPart_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*109*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatMissingDiagonal_MPIBAIJ, 2541 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2542 NULL, 2543 MatGetGhosts_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*119*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 MatGetMultiProcBlock_MPIBAIJ, 2551 /*124*/ NULL, 2552 MatGetColumnReductions_MPIBAIJ, 2553 MatInvertBlockDiagonal_MPIBAIJ, 2554 NULL, 2555 NULL, 2556 /*129*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*134*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*139*/ MatSetBlockSizes_Default, 2567 NULL, 2568 NULL, 2569 MatFDColoringSetUp_MPIXAIJ, 2570 NULL, 2571 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2572 NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 /*150*/ NULL, 2578 MatEliminateZeros_MPIBAIJ, 2579 NULL}; 2580 2581 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2582 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2583 2584 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2585 { 2586 PetscInt m, rstart, cstart, cend; 2587 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2588 const PetscInt *JJ = NULL; 2589 PetscScalar *values = NULL; 2590 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2591 PetscBool nooffprocentries; 2592 2593 PetscFunctionBegin; 2594 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2595 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2596 PetscCall(PetscLayoutSetUp(B->rmap)); 2597 PetscCall(PetscLayoutSetUp(B->cmap)); 2598 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2599 m = B->rmap->n / bs; 2600 rstart = B->rmap->rstart / bs; 2601 cstart = B->cmap->rstart / bs; 2602 cend = B->cmap->rend / bs; 2603 2604 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2605 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2606 for (i = 0; i < m; i++) { 2607 nz = ii[i + 1] - ii[i]; 2608 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2609 nz_max = PetscMax(nz_max, nz); 2610 dlen = 0; 2611 olen = 0; 2612 JJ = jj + ii[i]; 2613 for (j = 0; j < nz; j++) { 2614 if (*JJ < cstart || *JJ >= cend) olen++; 2615 else dlen++; 2616 JJ++; 2617 } 2618 d_nnz[i] = dlen; 2619 o_nnz[i] = olen; 2620 } 2621 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2622 PetscCall(PetscFree2(d_nnz, o_nnz)); 2623 2624 values = (PetscScalar *)V; 2625 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2626 for (i = 0; i < m; i++) { 2627 PetscInt row = i + rstart; 2628 PetscInt ncols = ii[i + 1] - ii[i]; 2629 const PetscInt *icols = jj + ii[i]; 2630 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2631 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2632 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2633 } else { /* block ordering does not match so we can only insert one block at a time. */ 2634 PetscInt j; 2635 for (j = 0; j < ncols; j++) { 2636 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2637 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2638 } 2639 } 2640 } 2641 2642 if (!V) PetscCall(PetscFree(values)); 2643 nooffprocentries = B->nooffprocentries; 2644 B->nooffprocentries = PETSC_TRUE; 2645 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2646 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2647 B->nooffprocentries = nooffprocentries; 2648 2649 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2650 PetscFunctionReturn(PETSC_SUCCESS); 2651 } 2652 2653 /*@C 2654 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2655 2656 Collective 2657 2658 Input Parameters: 2659 + B - the matrix 2660 . bs - the block size 2661 . i - the indices into `j` for the start of each local row (starts with zero) 2662 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2663 - v - optional values in the matrix 2664 2665 Level: advanced 2666 2667 Notes: 2668 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2669 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2670 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2671 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2672 block column and the second index is over columns within a block. 2673 2674 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2675 2676 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2677 @*/ 2678 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2679 { 2680 PetscFunctionBegin; 2681 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2682 PetscValidType(B, 1); 2683 PetscValidLogicalCollectiveInt(B, bs, 2); 2684 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2685 PetscFunctionReturn(PETSC_SUCCESS); 2686 } 2687 2688 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2689 { 2690 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2691 PetscInt i; 2692 PetscMPIInt size; 2693 2694 PetscFunctionBegin; 2695 if (B->hash_active) { 2696 B->ops[0] = b->cops; 2697 B->hash_active = PETSC_FALSE; 2698 } 2699 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2700 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2701 PetscCall(PetscLayoutSetUp(B->rmap)); 2702 PetscCall(PetscLayoutSetUp(B->cmap)); 2703 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2704 2705 if (d_nnz) { 2706 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2707 } 2708 if (o_nnz) { 2709 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2710 } 2711 2712 b->bs2 = bs * bs; 2713 b->mbs = B->rmap->n / bs; 2714 b->nbs = B->cmap->n / bs; 2715 b->Mbs = B->rmap->N / bs; 2716 b->Nbs = B->cmap->N / bs; 2717 2718 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2719 b->rstartbs = B->rmap->rstart / bs; 2720 b->rendbs = B->rmap->rend / bs; 2721 b->cstartbs = B->cmap->rstart / bs; 2722 b->cendbs = B->cmap->rend / bs; 2723 2724 #if defined(PETSC_USE_CTABLE) 2725 PetscCall(PetscHMapIDestroy(&b->colmap)); 2726 #else 2727 PetscCall(PetscFree(b->colmap)); 2728 #endif 2729 PetscCall(PetscFree(b->garray)); 2730 PetscCall(VecDestroy(&b->lvec)); 2731 PetscCall(VecScatterDestroy(&b->Mvctx)); 2732 2733 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2734 PetscCall(MatDestroy(&b->B)); 2735 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2736 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2737 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2738 2739 PetscCall(MatDestroy(&b->A)); 2740 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2741 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2742 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2743 2744 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2745 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2746 B->preallocated = PETSC_TRUE; 2747 B->was_assembled = PETSC_FALSE; 2748 B->assembled = PETSC_FALSE; 2749 PetscFunctionReturn(PETSC_SUCCESS); 2750 } 2751 2752 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2753 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2754 2755 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2756 { 2757 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2758 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2759 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2760 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2761 2762 PetscFunctionBegin; 2763 PetscCall(PetscMalloc1(M + 1, &ii)); 2764 ii[0] = 0; 2765 for (i = 0; i < M; i++) { 2766 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2767 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2768 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2769 /* remove one from count of matrix has diagonal */ 2770 for (j = id[i]; j < id[i + 1]; j++) { 2771 if (jd[j] == i) { 2772 ii[i + 1]--; 2773 break; 2774 } 2775 } 2776 } 2777 PetscCall(PetscMalloc1(ii[M], &jj)); 2778 cnt = 0; 2779 for (i = 0; i < M; i++) { 2780 for (j = io[i]; j < io[i + 1]; j++) { 2781 if (garray[jo[j]] > rstart) break; 2782 jj[cnt++] = garray[jo[j]]; 2783 } 2784 for (k = id[i]; k < id[i + 1]; k++) { 2785 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2786 } 2787 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2788 } 2789 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2790 PetscFunctionReturn(PETSC_SUCCESS); 2791 } 2792 2793 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2794 2795 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2798 { 2799 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2800 Mat_MPIAIJ *b; 2801 Mat B; 2802 2803 PetscFunctionBegin; 2804 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2805 2806 if (reuse == MAT_REUSE_MATRIX) { 2807 B = *newmat; 2808 } else { 2809 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2810 PetscCall(MatSetType(B, MATMPIAIJ)); 2811 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2812 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2813 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2814 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2815 } 2816 b = (Mat_MPIAIJ *)B->data; 2817 2818 if (reuse == MAT_REUSE_MATRIX) { 2819 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2820 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2821 } else { 2822 PetscInt *garray = a->garray; 2823 Mat_SeqAIJ *bB; 2824 PetscInt bs, nnz; 2825 PetscCall(MatDestroy(&b->A)); 2826 PetscCall(MatDestroy(&b->B)); 2827 /* just clear out the data structure */ 2828 PetscCall(MatDisAssemble_MPIAIJ(B)); 2829 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2830 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2831 2832 /* Global numbering for b->B columns */ 2833 bB = (Mat_SeqAIJ *)b->B->data; 2834 bs = A->rmap->bs; 2835 nnz = bB->i[A->rmap->n]; 2836 for (PetscInt k = 0; k < nnz; k++) { 2837 PetscInt bj = bB->j[k] / bs; 2838 PetscInt br = bB->j[k] % bs; 2839 bB->j[k] = garray[bj] * bs + br; 2840 } 2841 } 2842 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2843 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2844 2845 if (reuse == MAT_INPLACE_MATRIX) { 2846 PetscCall(MatHeaderReplace(A, &B)); 2847 } else { 2848 *newmat = B; 2849 } 2850 PetscFunctionReturn(PETSC_SUCCESS); 2851 } 2852 2853 /*MC 2854 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2855 2856 Options Database Keys: 2857 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2858 . -mat_block_size <bs> - set the blocksize used to store the matrix 2859 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2860 - -mat_use_hash_table <fact> - set hash table factor 2861 2862 Level: beginner 2863 2864 Note: 2865 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2866 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2867 2868 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2869 M*/ 2870 2871 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2872 2873 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2874 { 2875 Mat_MPIBAIJ *b; 2876 PetscBool flg = PETSC_FALSE; 2877 2878 PetscFunctionBegin; 2879 PetscCall(PetscNew(&b)); 2880 B->data = (void *)b; 2881 B->ops[0] = MatOps_Values; 2882 B->assembled = PETSC_FALSE; 2883 2884 B->insertmode = NOT_SET_VALUES; 2885 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2886 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2887 2888 /* build local table of row and column ownerships */ 2889 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2890 2891 /* build cache for off array entries formed */ 2892 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2893 2894 b->donotstash = PETSC_FALSE; 2895 b->colmap = NULL; 2896 b->garray = NULL; 2897 b->roworiented = PETSC_TRUE; 2898 2899 /* stuff used in block assembly */ 2900 b->barray = NULL; 2901 2902 /* stuff used for matrix vector multiply */ 2903 b->lvec = NULL; 2904 b->Mvctx = NULL; 2905 2906 /* stuff for MatGetRow() */ 2907 b->rowindices = NULL; 2908 b->rowvalues = NULL; 2909 b->getrowactive = PETSC_FALSE; 2910 2911 /* hash table stuff */ 2912 b->ht = NULL; 2913 b->hd = NULL; 2914 b->ht_size = 0; 2915 b->ht_flag = PETSC_FALSE; 2916 b->ht_fact = 0; 2917 b->ht_total_ct = 0; 2918 b->ht_insert_ct = 0; 2919 2920 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2921 b->ijonly = PETSC_FALSE; 2922 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2926 #if defined(PETSC_HAVE_HYPRE) 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2928 #endif 2929 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2933 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2935 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2936 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2937 2938 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2939 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2940 if (flg) { 2941 PetscReal fact = 1.39; 2942 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2943 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2944 if (fact <= 1.0) fact = 1.39; 2945 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2946 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2947 } 2948 PetscOptionsEnd(); 2949 PetscFunctionReturn(PETSC_SUCCESS); 2950 } 2951 2952 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2953 /*MC 2954 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2955 2956 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2957 and `MATMPIBAIJ` otherwise. 2958 2959 Options Database Keys: 2960 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2961 2962 Level: beginner 2963 2964 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2965 M*/ 2966 2967 /*@C 2968 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2969 (block compressed row). 2970 2971 Collective 2972 2973 Input Parameters: 2974 + B - the matrix 2975 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2976 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2977 . d_nz - number of block nonzeros per block row in diagonal portion of local 2978 submatrix (same for all local rows) 2979 . d_nnz - array containing the number of block nonzeros in the various block rows 2980 of the in diagonal portion of the local (possibly different for each block 2981 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2982 set it even if it is zero. 2983 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2984 submatrix (same for all local rows). 2985 - o_nnz - array containing the number of nonzeros in the various block rows of the 2986 off-diagonal portion of the local submatrix (possibly different for 2987 each block row) or `NULL`. 2988 2989 If the *_nnz parameter is given then the *_nz parameter is ignored 2990 2991 Options Database Keys: 2992 + -mat_block_size - size of the blocks to use 2993 - -mat_use_hash_table <fact> - set hash table factor 2994 2995 Level: intermediate 2996 2997 Notes: 2998 For good matrix assembly performance 2999 the user should preallocate the matrix storage by setting the parameters 3000 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3001 performance can be increased by more than a factor of 50. 3002 3003 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3004 than it must be used on all processors that share the object for that argument. 3005 3006 Storage Information: 3007 For a square global matrix we define each processor's diagonal portion 3008 to be its local rows and the corresponding columns (a square submatrix); 3009 each processor's off-diagonal portion encompasses the remainder of the 3010 local matrix (a rectangular submatrix). 3011 3012 The user can specify preallocated storage for the diagonal part of 3013 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3014 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3015 memory allocation. Likewise, specify preallocated storage for the 3016 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3017 3018 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3019 the figure below we depict these three local rows and all columns (0-11). 3020 3021 .vb 3022 0 1 2 3 4 5 6 7 8 9 10 11 3023 -------------------------- 3024 row 3 |o o o d d d o o o o o o 3025 row 4 |o o o d d d o o o o o o 3026 row 5 |o o o d d d o o o o o o 3027 -------------------------- 3028 .ve 3029 3030 Thus, any entries in the d locations are stored in the d (diagonal) 3031 submatrix, and any entries in the o locations are stored in the 3032 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3033 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3034 3035 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3036 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3037 In general, for PDE problems in which most nonzeros are near the diagonal, 3038 one expects `d_nz` >> `o_nz`. 3039 3040 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3041 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3042 You can also run with the option `-info` and look for messages with the string 3043 malloc in them to see if additional memory allocation was needed. 3044 3045 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3046 @*/ 3047 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3048 { 3049 PetscFunctionBegin; 3050 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3051 PetscValidType(B, 1); 3052 PetscValidLogicalCollectiveInt(B, bs, 2); 3053 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3054 PetscFunctionReturn(PETSC_SUCCESS); 3055 } 3056 3057 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3058 /*@C 3059 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3060 (block compressed row). 3061 3062 Collective 3063 3064 Input Parameters: 3065 + comm - MPI communicator 3066 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3067 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3068 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3069 This value should be the same as the local size used in creating the 3070 y vector for the matrix-vector product y = Ax. 3071 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3072 This value should be the same as the local size used in creating the 3073 x vector for the matrix-vector product y = Ax. 3074 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3075 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3076 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3077 submatrix (same for all local rows) 3078 . d_nnz - array containing the number of nonzero blocks in the various block rows 3079 of the in diagonal portion of the local (possibly different for each block 3080 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3081 and set it even if it is zero. 3082 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3083 submatrix (same for all local rows). 3084 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3085 off-diagonal portion of the local submatrix (possibly different for 3086 each block row) or NULL. 3087 3088 Output Parameter: 3089 . A - the matrix 3090 3091 Options Database Keys: 3092 + -mat_block_size - size of the blocks to use 3093 - -mat_use_hash_table <fact> - set hash table factor 3094 3095 Level: intermediate 3096 3097 Notes: 3098 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3099 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3100 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3101 3102 For good matrix assembly performance 3103 the user should preallocate the matrix storage by setting the parameters 3104 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3105 performance can be increased by more than a factor of 50. 3106 3107 If the *_nnz parameter is given then the *_nz parameter is ignored 3108 3109 A nonzero block is any block that as 1 or more nonzeros in it 3110 3111 The user MUST specify either the local or global matrix dimensions 3112 (possibly both). 3113 3114 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3115 than it must be used on all processors that share the object for that argument. 3116 3117 Storage Information: 3118 For a square global matrix we define each processor's diagonal portion 3119 to be its local rows and the corresponding columns (a square submatrix); 3120 each processor's off-diagonal portion encompasses the remainder of the 3121 local matrix (a rectangular submatrix). 3122 3123 The user can specify preallocated storage for the diagonal part of 3124 the local submatrix with either d_nz or d_nnz (not both). Set 3125 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3126 memory allocation. Likewise, specify preallocated storage for the 3127 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3128 3129 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3130 the figure below we depict these three local rows and all columns (0-11). 3131 3132 .vb 3133 0 1 2 3 4 5 6 7 8 9 10 11 3134 -------------------------- 3135 row 3 |o o o d d d o o o o o o 3136 row 4 |o o o d d d o o o o o o 3137 row 5 |o o o d d d o o o o o o 3138 -------------------------- 3139 .ve 3140 3141 Thus, any entries in the d locations are stored in the d (diagonal) 3142 submatrix, and any entries in the o locations are stored in the 3143 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3144 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3145 3146 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3147 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3148 In general, for PDE problems in which most nonzeros are near the diagonal, 3149 one expects `d_nz` >> `o_nz`. 3150 3151 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3152 @*/ 3153 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3154 { 3155 PetscMPIInt size; 3156 3157 PetscFunctionBegin; 3158 PetscCall(MatCreate(comm, A)); 3159 PetscCall(MatSetSizes(*A, m, n, M, N)); 3160 PetscCallMPI(MPI_Comm_size(comm, &size)); 3161 if (size > 1) { 3162 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3163 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3164 } else { 3165 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3166 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3167 } 3168 PetscFunctionReturn(PETSC_SUCCESS); 3169 } 3170 3171 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3172 { 3173 Mat mat; 3174 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3175 PetscInt len = 0; 3176 3177 PetscFunctionBegin; 3178 *newmat = NULL; 3179 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3180 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3181 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3182 3183 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3184 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3185 if (matin->hash_active) { 3186 PetscCall(MatSetUp(mat)); 3187 } else { 3188 mat->factortype = matin->factortype; 3189 mat->preallocated = PETSC_TRUE; 3190 mat->assembled = PETSC_TRUE; 3191 mat->insertmode = NOT_SET_VALUES; 3192 3193 a = (Mat_MPIBAIJ *)mat->data; 3194 mat->rmap->bs = matin->rmap->bs; 3195 a->bs2 = oldmat->bs2; 3196 a->mbs = oldmat->mbs; 3197 a->nbs = oldmat->nbs; 3198 a->Mbs = oldmat->Mbs; 3199 a->Nbs = oldmat->Nbs; 3200 3201 a->size = oldmat->size; 3202 a->rank = oldmat->rank; 3203 a->donotstash = oldmat->donotstash; 3204 a->roworiented = oldmat->roworiented; 3205 a->rowindices = NULL; 3206 a->rowvalues = NULL; 3207 a->getrowactive = PETSC_FALSE; 3208 a->barray = NULL; 3209 a->rstartbs = oldmat->rstartbs; 3210 a->rendbs = oldmat->rendbs; 3211 a->cstartbs = oldmat->cstartbs; 3212 a->cendbs = oldmat->cendbs; 3213 3214 /* hash table stuff */ 3215 a->ht = NULL; 3216 a->hd = NULL; 3217 a->ht_size = 0; 3218 a->ht_flag = oldmat->ht_flag; 3219 a->ht_fact = oldmat->ht_fact; 3220 a->ht_total_ct = 0; 3221 a->ht_insert_ct = 0; 3222 3223 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3224 if (oldmat->colmap) { 3225 #if defined(PETSC_USE_CTABLE) 3226 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3227 #else 3228 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3229 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3230 #endif 3231 } else a->colmap = NULL; 3232 3233 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3234 PetscCall(PetscMalloc1(len, &a->garray)); 3235 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3236 } else a->garray = NULL; 3237 3238 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3239 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3240 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3241 3242 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3243 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3244 } 3245 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3246 *newmat = mat; 3247 PetscFunctionReturn(PETSC_SUCCESS); 3248 } 3249 3250 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3251 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3252 { 3253 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3254 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3255 PetscScalar *matvals; 3256 3257 PetscFunctionBegin; 3258 PetscCall(PetscViewerSetUp(viewer)); 3259 3260 /* read in matrix header */ 3261 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3262 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3263 M = header[1]; 3264 N = header[2]; 3265 nz = header[3]; 3266 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3267 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3268 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3269 3270 /* set block sizes from the viewer's .info file */ 3271 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3272 /* set local sizes if not set already */ 3273 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3274 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3275 /* set global sizes if not set already */ 3276 if (mat->rmap->N < 0) mat->rmap->N = M; 3277 if (mat->cmap->N < 0) mat->cmap->N = N; 3278 PetscCall(PetscLayoutSetUp(mat->rmap)); 3279 PetscCall(PetscLayoutSetUp(mat->cmap)); 3280 3281 /* check if the matrix sizes are correct */ 3282 PetscCall(MatGetSize(mat, &rows, &cols)); 3283 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3284 PetscCall(MatGetBlockSize(mat, &bs)); 3285 PetscCall(MatGetLocalSize(mat, &m, &n)); 3286 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3287 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3288 mbs = m / bs; 3289 nbs = n / bs; 3290 3291 /* read in row lengths and build row indices */ 3292 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3293 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3294 rowidxs[0] = 0; 3295 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3296 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3297 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3298 3299 /* read in column indices and matrix values */ 3300 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3301 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3302 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3303 3304 { /* preallocate matrix storage */ 3305 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3306 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3307 PetscBool sbaij, done; 3308 PetscInt *d_nnz, *o_nnz; 3309 3310 PetscCall(PetscBTCreate(nbs, &bt)); 3311 PetscCall(PetscHSetICreate(&ht)); 3312 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3313 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3314 for (i = 0; i < mbs; i++) { 3315 PetscCall(PetscBTMemzero(nbs, bt)); 3316 PetscCall(PetscHSetIClear(ht)); 3317 for (k = 0; k < bs; k++) { 3318 PetscInt row = bs * i + k; 3319 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3320 PetscInt col = colidxs[j]; 3321 if (!sbaij || col >= row) { 3322 if (col >= cs && col < ce) { 3323 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3324 } else { 3325 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3326 if (done) o_nnz[i]++; 3327 } 3328 } 3329 } 3330 } 3331 } 3332 PetscCall(PetscBTDestroy(&bt)); 3333 PetscCall(PetscHSetIDestroy(&ht)); 3334 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3335 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3336 PetscCall(PetscFree2(d_nnz, o_nnz)); 3337 } 3338 3339 /* store matrix values */ 3340 for (i = 0; i < m; i++) { 3341 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3342 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3343 } 3344 3345 PetscCall(PetscFree(rowidxs)); 3346 PetscCall(PetscFree2(colidxs, matvals)); 3347 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3348 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3349 PetscFunctionReturn(PETSC_SUCCESS); 3350 } 3351 3352 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3353 { 3354 PetscBool isbinary; 3355 3356 PetscFunctionBegin; 3357 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3358 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3359 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3360 PetscFunctionReturn(PETSC_SUCCESS); 3361 } 3362 3363 /*@ 3364 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3365 3366 Input Parameters: 3367 + mat - the matrix 3368 - fact - factor 3369 3370 Options Database Key: 3371 . -mat_use_hash_table <fact> - provide the factor 3372 3373 Level: advanced 3374 3375 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3376 @*/ 3377 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3378 { 3379 PetscFunctionBegin; 3380 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3381 PetscFunctionReturn(PETSC_SUCCESS); 3382 } 3383 3384 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3385 { 3386 Mat_MPIBAIJ *baij; 3387 3388 PetscFunctionBegin; 3389 baij = (Mat_MPIBAIJ *)mat->data; 3390 baij->ht_fact = fact; 3391 PetscFunctionReturn(PETSC_SUCCESS); 3392 } 3393 3394 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3395 { 3396 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3397 PetscBool flg; 3398 3399 PetscFunctionBegin; 3400 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3401 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3402 if (Ad) *Ad = a->A; 3403 if (Ao) *Ao = a->B; 3404 if (colmap) *colmap = a->garray; 3405 PetscFunctionReturn(PETSC_SUCCESS); 3406 } 3407 3408 /* 3409 Special version for direct calls from Fortran (to eliminate two function call overheads 3410 */ 3411 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3412 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3413 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3414 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3415 #endif 3416 3417 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3418 /*@C 3419 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3420 3421 Collective 3422 3423 Input Parameters: 3424 + matin - the matrix 3425 . min - number of input rows 3426 . im - input rows 3427 . nin - number of input columns 3428 . in - input columns 3429 . v - numerical values input 3430 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3431 3432 Level: advanced 3433 3434 Developer Notes: 3435 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3436 3437 .seealso: `Mat`, `MatSetValuesBlocked()` 3438 @*/ 3439 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3440 { 3441 /* convert input arguments to C version */ 3442 Mat mat = *matin; 3443 PetscInt m = *min, n = *nin; 3444 InsertMode addv = *addvin; 3445 3446 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3447 const MatScalar *value; 3448 MatScalar *barray = baij->barray; 3449 PetscBool roworiented = baij->roworiented; 3450 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3451 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3452 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3453 3454 PetscFunctionBegin; 3455 /* tasks normally handled by MatSetValuesBlocked() */ 3456 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3457 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3458 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3459 if (mat->assembled) { 3460 mat->was_assembled = PETSC_TRUE; 3461 mat->assembled = PETSC_FALSE; 3462 } 3463 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3464 3465 if (!barray) { 3466 PetscCall(PetscMalloc1(bs2, &barray)); 3467 baij->barray = barray; 3468 } 3469 3470 if (roworiented) stepval = (n - 1) * bs; 3471 else stepval = (m - 1) * bs; 3472 3473 for (i = 0; i < m; i++) { 3474 if (im[i] < 0) continue; 3475 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3476 if (im[i] >= rstart && im[i] < rend) { 3477 row = im[i] - rstart; 3478 for (j = 0; j < n; j++) { 3479 /* If NumCol = 1 then a copy is not required */ 3480 if ((roworiented) && (n == 1)) { 3481 barray = (MatScalar *)v + i * bs2; 3482 } else if ((!roworiented) && (m == 1)) { 3483 barray = (MatScalar *)v + j * bs2; 3484 } else { /* Here a copy is required */ 3485 if (roworiented) { 3486 value = v + i * (stepval + bs) * bs + j * bs; 3487 } else { 3488 value = v + j * (stepval + bs) * bs + i * bs; 3489 } 3490 for (ii = 0; ii < bs; ii++, value += stepval) { 3491 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3492 } 3493 barray -= bs2; 3494 } 3495 3496 if (in[j] >= cstart && in[j] < cend) { 3497 col = in[j] - cstart; 3498 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3499 } else if (in[j] < 0) { 3500 continue; 3501 } else { 3502 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3503 if (mat->was_assembled) { 3504 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3505 3506 #if defined(PETSC_USE_DEBUG) 3507 #if defined(PETSC_USE_CTABLE) 3508 { 3509 PetscInt data; 3510 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3511 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3512 } 3513 #else 3514 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3515 #endif 3516 #endif 3517 #if defined(PETSC_USE_CTABLE) 3518 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3519 col = (col - 1) / bs; 3520 #else 3521 col = (baij->colmap[in[j]] - 1) / bs; 3522 #endif 3523 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3524 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3525 col = in[j]; 3526 } 3527 } else col = in[j]; 3528 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3529 } 3530 } 3531 } else { 3532 if (!baij->donotstash) { 3533 if (roworiented) { 3534 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3535 } else { 3536 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3537 } 3538 } 3539 } 3540 } 3541 3542 /* task normally handled by MatSetValuesBlocked() */ 3543 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3544 PetscFunctionReturn(PETSC_SUCCESS); 3545 } 3546 3547 /*@ 3548 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3549 CSR format the local rows. 3550 3551 Collective 3552 3553 Input Parameters: 3554 + comm - MPI communicator 3555 . bs - the block size, only a block size of 1 is supported 3556 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3557 . n - This value should be the same as the local size used in creating the 3558 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3559 calculated if N is given) For square matrices n is almost always m. 3560 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3561 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3562 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3563 . j - column indices 3564 - a - matrix values 3565 3566 Output Parameter: 3567 . mat - the matrix 3568 3569 Level: intermediate 3570 3571 Notes: 3572 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3573 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3574 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3575 3576 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3577 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3578 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3579 with column-major ordering within blocks. 3580 3581 The `i` and `j` indices are 0 based, and i indices are indices corresponding to the local `j` array. 3582 3583 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3584 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3585 @*/ 3586 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3587 { 3588 PetscFunctionBegin; 3589 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3590 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3591 PetscCall(MatCreate(comm, mat)); 3592 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3593 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3594 PetscCall(MatSetBlockSize(*mat, bs)); 3595 PetscCall(MatSetUp(*mat)); 3596 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3597 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3598 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3599 PetscFunctionReturn(PETSC_SUCCESS); 3600 } 3601 3602 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3603 { 3604 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3605 PetscInt *indx; 3606 PetscScalar *values; 3607 3608 PetscFunctionBegin; 3609 PetscCall(MatGetSize(inmat, &m, &N)); 3610 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3611 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3612 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3613 PetscInt *bindx, rmax = a->rmax, j; 3614 PetscMPIInt rank, size; 3615 3616 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3617 mbs = m / bs; 3618 Nbs = N / cbs; 3619 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3620 nbs = n / cbs; 3621 3622 PetscCall(PetscMalloc1(rmax, &bindx)); 3623 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3624 3625 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3626 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3627 if (rank == size - 1) { 3628 /* Check sum(nbs) = Nbs */ 3629 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3630 } 3631 3632 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3633 for (i = 0; i < mbs; i++) { 3634 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3635 nnz = nnz / bs; 3636 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3637 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3638 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3639 } 3640 PetscCall(PetscFree(bindx)); 3641 3642 PetscCall(MatCreate(comm, outmat)); 3643 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3644 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3645 PetscCall(MatSetType(*outmat, MATBAIJ)); 3646 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3647 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3648 MatPreallocateEnd(dnz, onz); 3649 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3650 } 3651 3652 /* numeric phase */ 3653 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3654 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3655 3656 for (i = 0; i < m; i++) { 3657 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3658 Ii = i + rstart; 3659 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3660 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3661 } 3662 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3663 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3664 PetscFunctionReturn(PETSC_SUCCESS); 3665 } 3666