1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_DEBUG) 444 #if defined(PETSC_USE_CTABLE) 445 { 446 PetscInt data; 447 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 448 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 449 } 450 #else 451 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 452 #endif 453 #endif 454 #if defined(PETSC_USE_CTABLE) 455 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 456 col = (col - 1) / bs; 457 #else 458 col = (baij->colmap[in[j]] - 1) / bs; 459 #endif 460 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 461 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 462 col = in[j]; 463 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 464 } else col = in[j]; 465 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 466 } 467 } 468 } else { 469 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 470 if (!baij->donotstash) { 471 if (roworiented) { 472 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 473 } else { 474 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 475 } 476 } 477 } 478 } 479 PetscFunctionReturn(PETSC_SUCCESS); 480 } 481 482 #define HASH_KEY 0.6180339887 483 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 484 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 485 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 486 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 487 { 488 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 489 PetscBool roworiented = baij->roworiented; 490 PetscInt i, j, row, col; 491 PetscInt rstart_orig = mat->rmap->rstart; 492 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 493 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 494 PetscReal tmp; 495 MatScalar **HD = baij->hd, value; 496 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 497 498 PetscFunctionBegin; 499 for (i = 0; i < m; i++) { 500 if (PetscDefined(USE_DEBUG)) { 501 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 502 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 503 } 504 row = im[i]; 505 if (row >= rstart_orig && row < rend_orig) { 506 for (j = 0; j < n; j++) { 507 col = in[j]; 508 if (roworiented) value = v[i * n + j]; 509 else value = v[i + j * m]; 510 /* Look up PetscInto the Hash Table */ 511 key = (row / bs) * Nbs + (col / bs) + 1; 512 h1 = HASH(size, key, tmp); 513 514 idx = h1; 515 if (PetscDefined(USE_DEBUG)) { 516 insert_ct++; 517 total_ct++; 518 if (HT[idx] != key) { 519 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 520 ; 521 if (idx == size) { 522 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 523 ; 524 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 525 } 526 } 527 } else if (HT[idx] != key) { 528 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 529 ; 530 if (idx == size) { 531 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 532 ; 533 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 534 } 535 } 536 /* A HASH table entry is found, so insert the values at the correct address */ 537 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 538 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 539 } 540 } else if (!baij->donotstash) { 541 if (roworiented) { 542 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 543 } else { 544 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 545 } 546 } 547 } 548 if (PetscDefined(USE_DEBUG)) { 549 baij->ht_total_ct += total_ct; 550 baij->ht_insert_ct += insert_ct; 551 } 552 PetscFunctionReturn(PETSC_SUCCESS); 553 } 554 555 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 556 { 557 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 558 PetscBool roworiented = baij->roworiented; 559 PetscInt i, j, ii, jj, row, col; 560 PetscInt rstart = baij->rstartbs; 561 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 562 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 563 PetscReal tmp; 564 MatScalar **HD = baij->hd, *baij_a; 565 const PetscScalar *v_t, *value; 566 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 567 568 PetscFunctionBegin; 569 if (roworiented) stepval = (n - 1) * bs; 570 else stepval = (m - 1) * bs; 571 572 for (i = 0; i < m; i++) { 573 if (PetscDefined(USE_DEBUG)) { 574 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 575 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 576 } 577 row = im[i]; 578 v_t = v + i * nbs2; 579 if (row >= rstart && row < rend) { 580 for (j = 0; j < n; j++) { 581 col = in[j]; 582 583 /* Look up into the Hash Table */ 584 key = row * Nbs + col + 1; 585 h1 = HASH(size, key, tmp); 586 587 idx = h1; 588 if (PetscDefined(USE_DEBUG)) { 589 total_ct++; 590 insert_ct++; 591 if (HT[idx] != key) { 592 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 593 ; 594 if (idx == size) { 595 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 596 ; 597 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 598 } 599 } 600 } else if (HT[idx] != key) { 601 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 602 ; 603 if (idx == size) { 604 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 605 ; 606 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 607 } 608 } 609 baij_a = HD[idx]; 610 if (roworiented) { 611 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 612 /* value = v + (i*(stepval+bs)+j)*bs; */ 613 value = v_t; 614 v_t += bs; 615 if (addv == ADD_VALUES) { 616 for (ii = 0; ii < bs; ii++, value += stepval) { 617 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 618 } 619 } else { 620 for (ii = 0; ii < bs; ii++, value += stepval) { 621 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 622 } 623 } 624 } else { 625 value = v + j * (stepval + bs) * bs + i * bs; 626 if (addv == ADD_VALUES) { 627 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 628 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 629 } 630 } else { 631 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 632 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 633 } 634 } 635 } 636 } 637 } else { 638 if (!baij->donotstash) { 639 if (roworiented) { 640 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 641 } else { 642 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 643 } 644 } 645 } 646 } 647 if (PetscDefined(USE_DEBUG)) { 648 baij->ht_total_ct += total_ct; 649 baij->ht_insert_ct += insert_ct; 650 } 651 PetscFunctionReturn(PETSC_SUCCESS); 652 } 653 654 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 655 { 656 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 657 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 658 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 659 660 PetscFunctionBegin; 661 for (i = 0; i < m; i++) { 662 if (idxm[i] < 0) continue; /* negative row */ 663 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 664 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 665 row = idxm[i] - bsrstart; 666 for (j = 0; j < n; j++) { 667 if (idxn[j] < 0) continue; /* negative column */ 668 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 669 if (idxn[j] >= bscstart && idxn[j] < bscend) { 670 col = idxn[j] - bscstart; 671 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 672 } else { 673 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 674 #if defined(PETSC_USE_CTABLE) 675 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 676 data--; 677 #else 678 data = baij->colmap[idxn[j] / bs] - 1; 679 #endif 680 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 681 else { 682 col = data + idxn[j] % bs; 683 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 684 } 685 } 686 } 687 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 688 } 689 PetscFunctionReturn(PETSC_SUCCESS); 690 } 691 692 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 693 { 694 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 695 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 696 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 697 PetscReal sum = 0.0; 698 MatScalar *v; 699 700 PetscFunctionBegin; 701 if (baij->size == 1) { 702 PetscCall(MatNorm(baij->A, type, nrm)); 703 } else { 704 if (type == NORM_FROBENIUS) { 705 v = amat->a; 706 nz = amat->nz * bs2; 707 for (i = 0; i < nz; i++) { 708 sum += PetscRealPart(PetscConj(*v) * (*v)); 709 v++; 710 } 711 v = bmat->a; 712 nz = bmat->nz * bs2; 713 for (i = 0; i < nz; i++) { 714 sum += PetscRealPart(PetscConj(*v) * (*v)); 715 v++; 716 } 717 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 718 *nrm = PetscSqrtReal(*nrm); 719 } else if (type == NORM_1) { /* max column sum */ 720 PetscReal *tmp, *tmp2; 721 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 722 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 723 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 724 v = amat->a; 725 jj = amat->j; 726 for (i = 0; i < amat->nz; i++) { 727 for (j = 0; j < bs; j++) { 728 col = bs * (cstart + *jj) + j; /* column index */ 729 for (row = 0; row < bs; row++) { 730 tmp[col] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 jj++; 735 } 736 v = bmat->a; 737 jj = bmat->j; 738 for (i = 0; i < bmat->nz; i++) { 739 for (j = 0; j < bs; j++) { 740 col = bs * garray[*jj] + j; 741 for (row = 0; row < bs; row++) { 742 tmp[col] += PetscAbsScalar(*v); 743 v++; 744 } 745 } 746 jj++; 747 } 748 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 749 *nrm = 0.0; 750 for (j = 0; j < mat->cmap->N; j++) { 751 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 752 } 753 PetscCall(PetscFree(tmp)); 754 PetscCall(PetscFree(tmp2)); 755 } else if (type == NORM_INFINITY) { /* max row sum */ 756 PetscReal *sums; 757 PetscCall(PetscMalloc1(bs, &sums)); 758 sum = 0.0; 759 for (j = 0; j < amat->mbs; j++) { 760 for (row = 0; row < bs; row++) sums[row] = 0.0; 761 v = amat->a + bs2 * amat->i[j]; 762 nz = amat->i[j + 1] - amat->i[j]; 763 for (i = 0; i < nz; i++) { 764 for (col = 0; col < bs; col++) { 765 for (row = 0; row < bs; row++) { 766 sums[row] += PetscAbsScalar(*v); 767 v++; 768 } 769 } 770 } 771 v = bmat->a + bs2 * bmat->i[j]; 772 nz = bmat->i[j + 1] - bmat->i[j]; 773 for (i = 0; i < nz; i++) { 774 for (col = 0; col < bs; col++) { 775 for (row = 0; row < bs; row++) { 776 sums[row] += PetscAbsScalar(*v); 777 v++; 778 } 779 } 780 } 781 for (row = 0; row < bs; row++) { 782 if (sums[row] > sum) sum = sums[row]; 783 } 784 } 785 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 786 PetscCall(PetscFree(sums)); 787 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 788 } 789 PetscFunctionReturn(PETSC_SUCCESS); 790 } 791 792 /* 793 Creates the hash table, and sets the table 794 This table is created only once. 795 If new entried need to be added to the matrix 796 then the hash table has to be destroyed and 797 recreated. 798 */ 799 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 800 { 801 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 802 Mat A = baij->A, B = baij->B; 803 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 804 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 805 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 806 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 807 PetscInt *HT, key; 808 MatScalar **HD; 809 PetscReal tmp; 810 #if defined(PETSC_USE_INFO) 811 PetscInt ct = 0, max = 0; 812 #endif 813 814 PetscFunctionBegin; 815 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 816 817 baij->ht_size = (PetscInt)(factor * nz); 818 ht_size = baij->ht_size; 819 820 /* Allocate Memory for Hash Table */ 821 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 822 HD = baij->hd; 823 HT = baij->ht; 824 825 /* Loop Over A */ 826 for (i = 0; i < a->mbs; i++) { 827 for (j = ai[i]; j < ai[i + 1]; j++) { 828 row = i + rstart; 829 col = aj[j] + cstart; 830 831 key = row * Nbs + col + 1; 832 h1 = HASH(ht_size, key, tmp); 833 for (k = 0; k < ht_size; k++) { 834 if (!HT[(h1 + k) % ht_size]) { 835 HT[(h1 + k) % ht_size] = key; 836 HD[(h1 + k) % ht_size] = a->a + j * bs2; 837 break; 838 #if defined(PETSC_USE_INFO) 839 } else { 840 ct++; 841 #endif 842 } 843 } 844 #if defined(PETSC_USE_INFO) 845 if (k > max) max = k; 846 #endif 847 } 848 } 849 /* Loop Over B */ 850 for (i = 0; i < b->mbs; i++) { 851 for (j = bi[i]; j < bi[i + 1]; j++) { 852 row = i + rstart; 853 col = garray[bj[j]]; 854 key = row * Nbs + col + 1; 855 h1 = HASH(ht_size, key, tmp); 856 for (k = 0; k < ht_size; k++) { 857 if (!HT[(h1 + k) % ht_size]) { 858 HT[(h1 + k) % ht_size] = key; 859 HD[(h1 + k) % ht_size] = b->a + j * bs2; 860 break; 861 #if defined(PETSC_USE_INFO) 862 } else { 863 ct++; 864 #endif 865 } 866 } 867 #if defined(PETSC_USE_INFO) 868 if (k > max) max = k; 869 #endif 870 } 871 } 872 873 /* Print Summary */ 874 #if defined(PETSC_USE_INFO) 875 for (i = 0, j = 0; i < ht_size; i++) { 876 if (HT[i]) j++; 877 } 878 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 879 #endif 880 PetscFunctionReturn(PETSC_SUCCESS); 881 } 882 883 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 884 { 885 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 886 PetscInt nstash, reallocs; 887 888 PetscFunctionBegin; 889 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 890 891 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 892 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 893 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 894 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 895 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 896 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 897 PetscFunctionReturn(PETSC_SUCCESS); 898 } 899 900 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 901 { 902 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 903 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 904 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 905 PetscInt *row, *col; 906 PetscBool r1, r2, r3, other_disassembled; 907 MatScalar *val; 908 PetscMPIInt n; 909 910 PetscFunctionBegin; 911 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 912 if (!baij->donotstash && !mat->nooffprocentries) { 913 while (1) { 914 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 915 if (!flg) break; 916 917 for (i = 0; i < n;) { 918 /* Now identify the consecutive vals belonging to the same row */ 919 for (j = i, rstart = row[j]; j < n; j++) { 920 if (row[j] != rstart) break; 921 } 922 if (j < n) ncols = j - i; 923 else ncols = n - i; 924 /* Now assemble all these values with a single function call */ 925 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 926 i = j; 927 } 928 } 929 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 930 /* Now process the block-stash. Since the values are stashed column-oriented, 931 set the row-oriented flag to column-oriented, and after MatSetValues() 932 restore the original flags */ 933 r1 = baij->roworiented; 934 r2 = a->roworiented; 935 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 936 937 baij->roworiented = PETSC_FALSE; 938 a->roworiented = PETSC_FALSE; 939 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; 940 while (1) { 941 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 942 if (!flg) break; 943 944 for (i = 0; i < n;) { 945 /* Now identify the consecutive vals belonging to the same row */ 946 for (j = i, rstart = row[j]; j < n; j++) { 947 if (row[j] != rstart) break; 948 } 949 if (j < n) ncols = j - i; 950 else ncols = n - i; 951 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 952 i = j; 953 } 954 } 955 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 956 957 baij->roworiented = r1; 958 a->roworiented = r2; 959 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 960 } 961 962 PetscCall(MatAssemblyBegin(baij->A, mode)); 963 PetscCall(MatAssemblyEnd(baij->A, mode)); 964 965 /* determine if any processor has disassembled, if so we must 966 also disassemble ourselves, in order that we may reassemble. */ 967 /* 968 if nonzero structure of submatrix B cannot change then we know that 969 no processor disassembled thus we can skip this stuff 970 */ 971 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 972 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 973 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 974 } 975 976 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 977 PetscCall(MatAssemblyBegin(baij->B, mode)); 978 PetscCall(MatAssemblyEnd(baij->B, mode)); 979 980 #if defined(PETSC_USE_INFO) 981 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 982 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 983 984 baij->ht_total_ct = 0; 985 baij->ht_insert_ct = 0; 986 } 987 #endif 988 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 989 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 990 991 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 992 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 993 } 994 995 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 996 997 baij->rowvalues = NULL; 998 999 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 1000 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 1001 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 1002 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 1003 } 1004 PetscFunctionReturn(PETSC_SUCCESS); 1005 } 1006 1007 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 1008 #include <petscdraw.h> 1009 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 1010 { 1011 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1012 PetscMPIInt rank = baij->rank; 1013 PetscInt bs = mat->rmap->bs; 1014 PetscBool iascii, isdraw; 1015 PetscViewer sviewer; 1016 PetscViewerFormat format; 1017 1018 PetscFunctionBegin; 1019 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1020 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1021 if (iascii) { 1022 PetscCall(PetscViewerGetFormat(viewer, &format)); 1023 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1024 MatInfo info; 1025 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1026 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1027 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1028 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1029 mat->rmap->bs, (double)info.memory)); 1030 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1031 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1032 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1033 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1034 PetscCall(PetscViewerFlush(viewer)); 1035 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1036 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1037 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1038 PetscFunctionReturn(PETSC_SUCCESS); 1039 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1040 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1041 PetscFunctionReturn(PETSC_SUCCESS); 1042 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1043 PetscFunctionReturn(PETSC_SUCCESS); 1044 } 1045 } 1046 1047 if (isdraw) { 1048 PetscDraw draw; 1049 PetscBool isnull; 1050 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1051 PetscCall(PetscDrawIsNull(draw, &isnull)); 1052 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1053 } 1054 1055 { 1056 /* assemble the entire matrix onto first processor. */ 1057 Mat A; 1058 Mat_SeqBAIJ *Aloc; 1059 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1060 MatScalar *a; 1061 const char *matname; 1062 1063 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1064 /* Perhaps this should be the type of mat? */ 1065 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1066 if (rank == 0) { 1067 PetscCall(MatSetSizes(A, M, N, M, N)); 1068 } else { 1069 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1070 } 1071 PetscCall(MatSetType(A, MATMPIBAIJ)); 1072 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1073 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1074 1075 /* copy over the A part */ 1076 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1077 ai = Aloc->i; 1078 aj = Aloc->j; 1079 a = Aloc->a; 1080 PetscCall(PetscMalloc1(bs, &rvals)); 1081 1082 for (i = 0; i < mbs; i++) { 1083 rvals[0] = bs * (baij->rstartbs + i); 1084 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1085 for (j = ai[i]; j < ai[i + 1]; j++) { 1086 col = (baij->cstartbs + aj[j]) * bs; 1087 for (k = 0; k < bs; k++) { 1088 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1089 col++; 1090 a += bs; 1091 } 1092 } 1093 } 1094 /* copy over the B part */ 1095 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1096 ai = Aloc->i; 1097 aj = Aloc->j; 1098 a = Aloc->a; 1099 for (i = 0; i < mbs; i++) { 1100 rvals[0] = bs * (baij->rstartbs + i); 1101 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1102 for (j = ai[i]; j < ai[i + 1]; j++) { 1103 col = baij->garray[aj[j]] * bs; 1104 for (k = 0; k < bs; k++) { 1105 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1106 col++; 1107 a += bs; 1108 } 1109 } 1110 } 1111 PetscCall(PetscFree(rvals)); 1112 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1113 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1114 /* 1115 Everyone has to call to draw the matrix since the graphics waits are 1116 synchronized across all processors that share the PetscDraw object 1117 */ 1118 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1119 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1120 if (rank == 0) { 1121 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1122 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1123 } 1124 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1125 PetscCall(MatDestroy(&A)); 1126 } 1127 PetscFunctionReturn(PETSC_SUCCESS); 1128 } 1129 1130 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1131 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1132 { 1133 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1134 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1135 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1136 const PetscInt *garray = aij->garray; 1137 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1138 PetscInt64 nz, hnz; 1139 PetscInt *rowlens, *colidxs; 1140 PetscScalar *matvals; 1141 PetscMPIInt rank; 1142 1143 PetscFunctionBegin; 1144 PetscCall(PetscViewerSetUp(viewer)); 1145 1146 M = mat->rmap->N; 1147 N = mat->cmap->N; 1148 m = mat->rmap->n; 1149 rs = mat->rmap->rstart; 1150 cs = mat->cmap->rstart; 1151 bs = mat->rmap->bs; 1152 nz = bs * bs * (A->nz + B->nz); 1153 1154 /* write matrix header */ 1155 header[0] = MAT_FILE_CLASSID; 1156 header[1] = M; 1157 header[2] = N; 1158 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1159 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1160 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1161 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1162 1163 /* fill in and store row lengths */ 1164 PetscCall(PetscMalloc1(m, &rowlens)); 1165 for (cnt = 0, i = 0; i < A->mbs; i++) 1166 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1167 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1168 PetscCall(PetscFree(rowlens)); 1169 1170 /* fill in and store column indices */ 1171 PetscCall(PetscMalloc1(nz, &colidxs)); 1172 for (cnt = 0, i = 0; i < A->mbs; i++) { 1173 for (k = 0; k < bs; k++) { 1174 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1175 if (garray[B->j[jb]] > cs / bs) break; 1176 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1177 } 1178 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1179 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1180 for (; jb < B->i[i + 1]; jb++) 1181 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1182 } 1183 } 1184 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1185 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1186 PetscCall(PetscFree(colidxs)); 1187 1188 /* fill in and store nonzero values */ 1189 PetscCall(PetscMalloc1(nz, &matvals)); 1190 for (cnt = 0, i = 0; i < A->mbs; i++) { 1191 for (k = 0; k < bs; k++) { 1192 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1193 if (garray[B->j[jb]] > cs / bs) break; 1194 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1195 } 1196 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1197 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1198 for (; jb < B->i[i + 1]; jb++) 1199 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1200 } 1201 } 1202 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1203 PetscCall(PetscFree(matvals)); 1204 1205 /* write block size option to the viewer's .info file */ 1206 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1207 PetscFunctionReturn(PETSC_SUCCESS); 1208 } 1209 1210 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1211 { 1212 PetscBool iascii, isdraw, issocket, isbinary; 1213 1214 PetscFunctionBegin; 1215 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1216 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1217 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1218 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1219 if (iascii || isdraw || issocket) { 1220 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1221 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1222 PetscFunctionReturn(PETSC_SUCCESS); 1223 } 1224 1225 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1226 { 1227 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1228 PetscInt nt; 1229 1230 PetscFunctionBegin; 1231 PetscCall(VecGetLocalSize(xx, &nt)); 1232 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1233 PetscCall(VecGetLocalSize(yy, &nt)); 1234 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1235 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1236 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1237 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1238 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1239 PetscFunctionReturn(PETSC_SUCCESS); 1240 } 1241 1242 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1243 { 1244 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1245 1246 PetscFunctionBegin; 1247 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1248 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1249 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1250 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1251 PetscFunctionReturn(PETSC_SUCCESS); 1252 } 1253 1254 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1255 { 1256 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1257 1258 PetscFunctionBegin; 1259 /* do nondiagonal part */ 1260 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1261 /* do local part */ 1262 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1263 /* add partial results together */ 1264 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1265 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1266 PetscFunctionReturn(PETSC_SUCCESS); 1267 } 1268 1269 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1270 { 1271 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1272 1273 PetscFunctionBegin; 1274 /* do nondiagonal part */ 1275 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1276 /* do local part */ 1277 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1278 /* add partial results together */ 1279 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1280 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1281 PetscFunctionReturn(PETSC_SUCCESS); 1282 } 1283 1284 /* 1285 This only works correctly for square matrices where the subblock A->A is the 1286 diagonal block 1287 */ 1288 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1289 { 1290 PetscFunctionBegin; 1291 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1292 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1293 PetscFunctionReturn(PETSC_SUCCESS); 1294 } 1295 1296 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1297 { 1298 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1299 1300 PetscFunctionBegin; 1301 PetscCall(MatScale(a->A, aa)); 1302 PetscCall(MatScale(a->B, aa)); 1303 PetscFunctionReturn(PETSC_SUCCESS); 1304 } 1305 1306 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1307 { 1308 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1309 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1310 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1311 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1312 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1313 1314 PetscFunctionBegin; 1315 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1316 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1317 mat->getrowactive = PETSC_TRUE; 1318 1319 if (!mat->rowvalues && (idx || v)) { 1320 /* 1321 allocate enough space to hold information from the longest row. 1322 */ 1323 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1324 PetscInt max = 1, mbs = mat->mbs, tmp; 1325 for (i = 0; i < mbs; i++) { 1326 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1327 if (max < tmp) max = tmp; 1328 } 1329 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1330 } 1331 lrow = row - brstart; 1332 1333 pvA = &vworkA; 1334 pcA = &cworkA; 1335 pvB = &vworkB; 1336 pcB = &cworkB; 1337 if (!v) { 1338 pvA = NULL; 1339 pvB = NULL; 1340 } 1341 if (!idx) { 1342 pcA = NULL; 1343 if (!v) pcB = NULL; 1344 } 1345 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1346 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1347 nztot = nzA + nzB; 1348 1349 cmap = mat->garray; 1350 if (v || idx) { 1351 if (nztot) { 1352 /* Sort by increasing column numbers, assuming A and B already sorted */ 1353 PetscInt imark = -1; 1354 if (v) { 1355 *v = v_p = mat->rowvalues; 1356 for (i = 0; i < nzB; i++) { 1357 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1358 else break; 1359 } 1360 imark = i; 1361 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1362 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1363 } 1364 if (idx) { 1365 *idx = idx_p = mat->rowindices; 1366 if (imark > -1) { 1367 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 } else { 1369 for (i = 0; i < nzB; i++) { 1370 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1371 else break; 1372 } 1373 imark = i; 1374 } 1375 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1376 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1377 } 1378 } else { 1379 if (idx) *idx = NULL; 1380 if (v) *v = NULL; 1381 } 1382 } 1383 *nz = nztot; 1384 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1385 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1386 PetscFunctionReturn(PETSC_SUCCESS); 1387 } 1388 1389 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1390 { 1391 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1392 1393 PetscFunctionBegin; 1394 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1395 baij->getrowactive = PETSC_FALSE; 1396 PetscFunctionReturn(PETSC_SUCCESS); 1397 } 1398 1399 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1400 { 1401 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1402 1403 PetscFunctionBegin; 1404 PetscCall(MatZeroEntries(l->A)); 1405 PetscCall(MatZeroEntries(l->B)); 1406 PetscFunctionReturn(PETSC_SUCCESS); 1407 } 1408 1409 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1410 { 1411 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1412 Mat A = a->A, B = a->B; 1413 PetscLogDouble isend[5], irecv[5]; 1414 1415 PetscFunctionBegin; 1416 info->block_size = (PetscReal)matin->rmap->bs; 1417 1418 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1419 1420 isend[0] = info->nz_used; 1421 isend[1] = info->nz_allocated; 1422 isend[2] = info->nz_unneeded; 1423 isend[3] = info->memory; 1424 isend[4] = info->mallocs; 1425 1426 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1427 1428 isend[0] += info->nz_used; 1429 isend[1] += info->nz_allocated; 1430 isend[2] += info->nz_unneeded; 1431 isend[3] += info->memory; 1432 isend[4] += info->mallocs; 1433 1434 if (flag == MAT_LOCAL) { 1435 info->nz_used = isend[0]; 1436 info->nz_allocated = isend[1]; 1437 info->nz_unneeded = isend[2]; 1438 info->memory = isend[3]; 1439 info->mallocs = isend[4]; 1440 } else if (flag == MAT_GLOBAL_MAX) { 1441 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1442 1443 info->nz_used = irecv[0]; 1444 info->nz_allocated = irecv[1]; 1445 info->nz_unneeded = irecv[2]; 1446 info->memory = irecv[3]; 1447 info->mallocs = irecv[4]; 1448 } else if (flag == MAT_GLOBAL_SUM) { 1449 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1450 1451 info->nz_used = irecv[0]; 1452 info->nz_allocated = irecv[1]; 1453 info->nz_unneeded = irecv[2]; 1454 info->memory = irecv[3]; 1455 info->mallocs = irecv[4]; 1456 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1457 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1458 info->fill_ratio_needed = 0; 1459 info->factor_mallocs = 0; 1460 PetscFunctionReturn(PETSC_SUCCESS); 1461 } 1462 1463 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1464 { 1465 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1466 1467 PetscFunctionBegin; 1468 switch (op) { 1469 case MAT_NEW_NONZERO_LOCATIONS: 1470 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1471 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1472 case MAT_KEEP_NONZERO_PATTERN: 1473 case MAT_NEW_NONZERO_LOCATION_ERR: 1474 MatCheckPreallocated(A, 1); 1475 PetscCall(MatSetOption(a->A, op, flg)); 1476 PetscCall(MatSetOption(a->B, op, flg)); 1477 break; 1478 case MAT_ROW_ORIENTED: 1479 MatCheckPreallocated(A, 1); 1480 a->roworiented = flg; 1481 1482 PetscCall(MatSetOption(a->A, op, flg)); 1483 PetscCall(MatSetOption(a->B, op, flg)); 1484 break; 1485 case MAT_FORCE_DIAGONAL_ENTRIES: 1486 case MAT_SORTED_FULL: 1487 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1488 break; 1489 case MAT_IGNORE_OFF_PROC_ENTRIES: 1490 a->donotstash = flg; 1491 break; 1492 case MAT_USE_HASH_TABLE: 1493 a->ht_flag = flg; 1494 a->ht_fact = 1.39; 1495 break; 1496 case MAT_SYMMETRIC: 1497 case MAT_STRUCTURALLY_SYMMETRIC: 1498 case MAT_HERMITIAN: 1499 case MAT_SUBMAT_SINGLEIS: 1500 case MAT_SYMMETRY_ETERNAL: 1501 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1502 case MAT_SPD_ETERNAL: 1503 /* if the diagonal matrix is square it inherits some of the properties above */ 1504 break; 1505 default: 1506 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1507 } 1508 PetscFunctionReturn(PETSC_SUCCESS); 1509 } 1510 1511 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1512 { 1513 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1514 Mat_SeqBAIJ *Aloc; 1515 Mat B; 1516 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1517 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1518 MatScalar *a; 1519 1520 PetscFunctionBegin; 1521 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1522 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1523 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1524 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1525 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1526 /* Do not know preallocation information, but must set block size */ 1527 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1528 } else { 1529 B = *matout; 1530 } 1531 1532 /* copy over the A part */ 1533 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1534 ai = Aloc->i; 1535 aj = Aloc->j; 1536 a = Aloc->a; 1537 PetscCall(PetscMalloc1(bs, &rvals)); 1538 1539 for (i = 0; i < mbs; i++) { 1540 rvals[0] = bs * (baij->rstartbs + i); 1541 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1542 for (j = ai[i]; j < ai[i + 1]; j++) { 1543 col = (baij->cstartbs + aj[j]) * bs; 1544 for (k = 0; k < bs; k++) { 1545 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1546 1547 col++; 1548 a += bs; 1549 } 1550 } 1551 } 1552 /* copy over the B part */ 1553 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1554 ai = Aloc->i; 1555 aj = Aloc->j; 1556 a = Aloc->a; 1557 for (i = 0; i < mbs; i++) { 1558 rvals[0] = bs * (baij->rstartbs + i); 1559 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1560 for (j = ai[i]; j < ai[i + 1]; j++) { 1561 col = baij->garray[aj[j]] * bs; 1562 for (k = 0; k < bs; k++) { 1563 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1564 col++; 1565 a += bs; 1566 } 1567 } 1568 } 1569 PetscCall(PetscFree(rvals)); 1570 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1571 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1572 1573 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1574 else PetscCall(MatHeaderMerge(A, &B)); 1575 PetscFunctionReturn(PETSC_SUCCESS); 1576 } 1577 1578 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1579 { 1580 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1581 Mat a = baij->A, b = baij->B; 1582 PetscInt s1, s2, s3; 1583 1584 PetscFunctionBegin; 1585 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1586 if (rr) { 1587 PetscCall(VecGetLocalSize(rr, &s1)); 1588 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1589 /* Overlap communication with computation. */ 1590 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1591 } 1592 if (ll) { 1593 PetscCall(VecGetLocalSize(ll, &s1)); 1594 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1595 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1596 } 1597 /* scale the diagonal block */ 1598 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1599 1600 if (rr) { 1601 /* Do a scatter end and then right scale the off-diagonal block */ 1602 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1603 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1604 } 1605 PetscFunctionReturn(PETSC_SUCCESS); 1606 } 1607 1608 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1609 { 1610 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1611 PetscInt *lrows; 1612 PetscInt r, len; 1613 PetscBool cong; 1614 1615 PetscFunctionBegin; 1616 /* get locally owned rows */ 1617 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1618 /* fix right hand side if needed */ 1619 if (x && b) { 1620 const PetscScalar *xx; 1621 PetscScalar *bb; 1622 1623 PetscCall(VecGetArrayRead(x, &xx)); 1624 PetscCall(VecGetArray(b, &bb)); 1625 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1626 PetscCall(VecRestoreArrayRead(x, &xx)); 1627 PetscCall(VecRestoreArray(b, &bb)); 1628 } 1629 1630 /* actually zap the local rows */ 1631 /* 1632 Zero the required rows. If the "diagonal block" of the matrix 1633 is square and the user wishes to set the diagonal we use separate 1634 code so that MatSetValues() is not called for each diagonal allocating 1635 new memory, thus calling lots of mallocs and slowing things down. 1636 1637 */ 1638 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1639 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1640 PetscCall(MatHasCongruentLayouts(A, &cong)); 1641 if ((diag != 0.0) && cong) { 1642 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1643 } else if (diag != 0.0) { 1644 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1645 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1646 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1647 for (r = 0; r < len; ++r) { 1648 const PetscInt row = lrows[r] + A->rmap->rstart; 1649 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1650 } 1651 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1652 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1653 } else { 1654 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1655 } 1656 PetscCall(PetscFree(lrows)); 1657 1658 /* only change matrix nonzero state if pattern was allowed to be changed */ 1659 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern || !((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1660 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1661 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1662 } 1663 PetscFunctionReturn(PETSC_SUCCESS); 1664 } 1665 1666 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1667 { 1668 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1669 PetscMPIInt n = A->rmap->n, p = 0; 1670 PetscInt i, j, k, r, len = 0, row, col, count; 1671 PetscInt *lrows, *owners = A->rmap->range; 1672 PetscSFNode *rrows; 1673 PetscSF sf; 1674 const PetscScalar *xx; 1675 PetscScalar *bb, *mask; 1676 Vec xmask, lmask; 1677 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1678 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1679 PetscScalar *aa; 1680 1681 PetscFunctionBegin; 1682 /* Create SF where leaves are input rows and roots are owned rows */ 1683 PetscCall(PetscMalloc1(n, &lrows)); 1684 for (r = 0; r < n; ++r) lrows[r] = -1; 1685 PetscCall(PetscMalloc1(N, &rrows)); 1686 for (r = 0; r < N; ++r) { 1687 const PetscInt idx = rows[r]; 1688 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1689 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1690 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1691 } 1692 rrows[r].rank = p; 1693 rrows[r].index = rows[r] - owners[p]; 1694 } 1695 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1696 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1697 /* Collect flags for rows to be zeroed */ 1698 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1699 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1700 PetscCall(PetscSFDestroy(&sf)); 1701 /* Compress and put in row numbers */ 1702 for (r = 0; r < n; ++r) 1703 if (lrows[r] >= 0) lrows[len++] = r; 1704 /* zero diagonal part of matrix */ 1705 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1706 /* handle off-diagonal part of matrix */ 1707 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1708 PetscCall(VecDuplicate(l->lvec, &lmask)); 1709 PetscCall(VecGetArray(xmask, &bb)); 1710 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1711 PetscCall(VecRestoreArray(xmask, &bb)); 1712 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1713 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecDestroy(&xmask)); 1715 if (x) { 1716 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1717 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1718 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1719 PetscCall(VecGetArray(b, &bb)); 1720 } 1721 PetscCall(VecGetArray(lmask, &mask)); 1722 /* remove zeroed rows of off-diagonal matrix */ 1723 for (i = 0; i < len; ++i) { 1724 row = lrows[i]; 1725 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1726 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1727 for (k = 0; k < count; ++k) { 1728 aa[0] = 0.0; 1729 aa += bs; 1730 } 1731 } 1732 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1733 for (i = 0; i < l->B->rmap->N; ++i) { 1734 row = i / bs; 1735 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1736 for (k = 0; k < bs; ++k) { 1737 col = bs * baij->j[j] + k; 1738 if (PetscAbsScalar(mask[col])) { 1739 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1740 if (x) bb[i] -= aa[0] * xx[col]; 1741 aa[0] = 0.0; 1742 } 1743 } 1744 } 1745 } 1746 if (x) { 1747 PetscCall(VecRestoreArray(b, &bb)); 1748 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1749 } 1750 PetscCall(VecRestoreArray(lmask, &mask)); 1751 PetscCall(VecDestroy(&lmask)); 1752 PetscCall(PetscFree(lrows)); 1753 1754 /* only change matrix nonzero state if pattern was allowed to be changed */ 1755 if (!((Mat_SeqBAIJ *)(l->A->data))->nonew) { 1756 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1757 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1758 } 1759 PetscFunctionReturn(PETSC_SUCCESS); 1760 } 1761 1762 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1763 { 1764 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1765 1766 PetscFunctionBegin; 1767 PetscCall(MatSetUnfactored(a->A)); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1772 1773 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1774 { 1775 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1776 Mat a, b, c, d; 1777 PetscBool flg; 1778 1779 PetscFunctionBegin; 1780 a = matA->A; 1781 b = matA->B; 1782 c = matB->A; 1783 d = matB->B; 1784 1785 PetscCall(MatEqual(a, c, &flg)); 1786 if (flg) PetscCall(MatEqual(b, d, &flg)); 1787 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1788 PetscFunctionReturn(PETSC_SUCCESS); 1789 } 1790 1791 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1792 { 1793 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1794 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1795 1796 PetscFunctionBegin; 1797 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1798 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1799 PetscCall(MatCopy_Basic(A, B, str)); 1800 } else { 1801 PetscCall(MatCopy(a->A, b->A, str)); 1802 PetscCall(MatCopy(a->B, b->B, str)); 1803 } 1804 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1805 PetscFunctionReturn(PETSC_SUCCESS); 1806 } 1807 1808 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1809 { 1810 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1811 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1812 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1813 1814 PetscFunctionBegin; 1815 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1816 PetscFunctionReturn(PETSC_SUCCESS); 1817 } 1818 1819 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1820 { 1821 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1822 PetscBLASInt bnz, one = 1; 1823 Mat_SeqBAIJ *x, *y; 1824 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1825 1826 PetscFunctionBegin; 1827 if (str == SAME_NONZERO_PATTERN) { 1828 PetscScalar alpha = a; 1829 x = (Mat_SeqBAIJ *)xx->A->data; 1830 y = (Mat_SeqBAIJ *)yy->A->data; 1831 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1832 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1833 x = (Mat_SeqBAIJ *)xx->B->data; 1834 y = (Mat_SeqBAIJ *)yy->B->data; 1835 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1836 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1837 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1838 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1839 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1840 } else { 1841 Mat B; 1842 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1843 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1844 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1845 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1846 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1847 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1848 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1849 PetscCall(MatSetType(B, MATMPIBAIJ)); 1850 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1851 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1852 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1853 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1854 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1855 PetscCall(MatHeaderMerge(Y, &B)); 1856 PetscCall(PetscFree(nnz_d)); 1857 PetscCall(PetscFree(nnz_o)); 1858 } 1859 PetscFunctionReturn(PETSC_SUCCESS); 1860 } 1861 1862 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1863 { 1864 PetscFunctionBegin; 1865 if (PetscDefined(USE_COMPLEX)) { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1867 1868 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1869 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1870 } 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1875 { 1876 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1877 1878 PetscFunctionBegin; 1879 PetscCall(MatRealPart(a->A)); 1880 PetscCall(MatRealPart(a->B)); 1881 PetscFunctionReturn(PETSC_SUCCESS); 1882 } 1883 1884 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1885 { 1886 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1887 1888 PetscFunctionBegin; 1889 PetscCall(MatImaginaryPart(a->A)); 1890 PetscCall(MatImaginaryPart(a->B)); 1891 PetscFunctionReturn(PETSC_SUCCESS); 1892 } 1893 1894 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1895 { 1896 IS iscol_local; 1897 PetscInt csize; 1898 1899 PetscFunctionBegin; 1900 PetscCall(ISGetLocalSize(iscol, &csize)); 1901 if (call == MAT_REUSE_MATRIX) { 1902 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1903 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1904 } else { 1905 PetscCall(ISAllGather(iscol, &iscol_local)); 1906 } 1907 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1908 if (call == MAT_INITIAL_MATRIX) { 1909 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1910 PetscCall(ISDestroy(&iscol_local)); 1911 } 1912 PetscFunctionReturn(PETSC_SUCCESS); 1913 } 1914 1915 /* 1916 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1917 in local and then by concatenating the local matrices the end result. 1918 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1919 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1920 */ 1921 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1922 { 1923 PetscMPIInt rank, size; 1924 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1925 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1926 Mat M, Mreuse; 1927 MatScalar *vwork, *aa; 1928 MPI_Comm comm; 1929 IS isrow_new, iscol_new; 1930 Mat_SeqBAIJ *aij; 1931 1932 PetscFunctionBegin; 1933 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1934 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1935 PetscCallMPI(MPI_Comm_size(comm, &size)); 1936 /* The compression and expansion should be avoided. Doesn't point 1937 out errors, might change the indices, hence buggey */ 1938 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1939 if (isrow == iscol) { 1940 iscol_new = isrow_new; 1941 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1942 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1943 1944 if (call == MAT_REUSE_MATRIX) { 1945 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1946 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1947 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1948 } else { 1949 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1950 } 1951 PetscCall(ISDestroy(&isrow_new)); 1952 PetscCall(ISDestroy(&iscol_new)); 1953 /* 1954 m - number of local rows 1955 n - number of columns (same on all processors) 1956 rstart - first row in new global matrix generated 1957 */ 1958 PetscCall(MatGetBlockSize(mat, &bs)); 1959 PetscCall(MatGetSize(Mreuse, &m, &n)); 1960 m = m / bs; 1961 n = n / bs; 1962 1963 if (call == MAT_INITIAL_MATRIX) { 1964 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1965 ii = aij->i; 1966 jj = aij->j; 1967 1968 /* 1969 Determine the number of non-zeros in the diagonal and off-diagonal 1970 portions of the matrix in order to do correct preallocation 1971 */ 1972 1973 /* first get start and end of "diagonal" columns */ 1974 if (csize == PETSC_DECIDE) { 1975 PetscCall(ISGetSize(isrow, &mglobal)); 1976 if (mglobal == n * bs) { /* square matrix */ 1977 nlocal = m; 1978 } else { 1979 nlocal = n / size + ((n % size) > rank); 1980 } 1981 } else { 1982 nlocal = csize / bs; 1983 } 1984 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1985 rstart = rend - nlocal; 1986 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1987 1988 /* next, compute all the lengths */ 1989 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1990 for (i = 0; i < m; i++) { 1991 jend = ii[i + 1] - ii[i]; 1992 olen = 0; 1993 dlen = 0; 1994 for (j = 0; j < jend; j++) { 1995 if (*jj < rstart || *jj >= rend) olen++; 1996 else dlen++; 1997 jj++; 1998 } 1999 olens[i] = olen; 2000 dlens[i] = dlen; 2001 } 2002 PetscCall(MatCreate(comm, &M)); 2003 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2004 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 2005 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2006 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(PetscFree2(dlens, olens)); 2008 } else { 2009 PetscInt ml, nl; 2010 2011 M = *newmat; 2012 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2013 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2014 PetscCall(MatZeroEntries(M)); 2015 /* 2016 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2017 rather than the slower MatSetValues(). 2018 */ 2019 M->was_assembled = PETSC_TRUE; 2020 M->assembled = PETSC_FALSE; 2021 } 2022 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2023 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2024 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2025 ii = aij->i; 2026 jj = aij->j; 2027 aa = aij->a; 2028 for (i = 0; i < m; i++) { 2029 row = rstart / bs + i; 2030 nz = ii[i + 1] - ii[i]; 2031 cwork = jj; 2032 jj = PetscSafePointerPlusOffset(jj, nz); 2033 vwork = aa; 2034 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2035 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2036 } 2037 2038 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2039 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2040 *newmat = M; 2041 2042 /* save submatrix used in processor for next request */ 2043 if (call == MAT_INITIAL_MATRIX) { 2044 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2045 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2046 } 2047 PetscFunctionReturn(PETSC_SUCCESS); 2048 } 2049 2050 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2051 { 2052 MPI_Comm comm, pcomm; 2053 PetscInt clocal_size, nrows; 2054 const PetscInt *rows; 2055 PetscMPIInt size; 2056 IS crowp, lcolp; 2057 2058 PetscFunctionBegin; 2059 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2060 /* make a collective version of 'rowp' */ 2061 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2062 if (pcomm == comm) { 2063 crowp = rowp; 2064 } else { 2065 PetscCall(ISGetSize(rowp, &nrows)); 2066 PetscCall(ISGetIndices(rowp, &rows)); 2067 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2068 PetscCall(ISRestoreIndices(rowp, &rows)); 2069 } 2070 PetscCall(ISSetPermutation(crowp)); 2071 /* make a local version of 'colp' */ 2072 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2073 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2074 if (size == 1) { 2075 lcolp = colp; 2076 } else { 2077 PetscCall(ISAllGather(colp, &lcolp)); 2078 } 2079 PetscCall(ISSetPermutation(lcolp)); 2080 /* now we just get the submatrix */ 2081 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2082 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2083 /* clean up */ 2084 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2085 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2086 PetscFunctionReturn(PETSC_SUCCESS); 2087 } 2088 2089 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2090 { 2091 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2093 2094 PetscFunctionBegin; 2095 if (nghosts) *nghosts = B->nbs; 2096 if (ghosts) *ghosts = baij->garray; 2097 PetscFunctionReturn(PETSC_SUCCESS); 2098 } 2099 2100 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2101 { 2102 Mat B; 2103 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2104 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2105 Mat_SeqAIJ *b; 2106 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2107 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2108 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2109 2110 PetscFunctionBegin; 2111 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2112 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2113 2114 /* Tell every processor the number of nonzeros per row */ 2115 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2116 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2117 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2118 displs = recvcounts + size; 2119 for (i = 0; i < size; i++) { 2120 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2121 displs[i] = A->rmap->range[i] / bs; 2122 } 2123 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2124 /* Create the sequential matrix of the same type as the local block diagonal */ 2125 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2126 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2127 PetscCall(MatSetType(B, MATSEQAIJ)); 2128 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2129 b = (Mat_SeqAIJ *)B->data; 2130 2131 /* Copy my part of matrix column indices over */ 2132 sendcount = ad->nz + bd->nz; 2133 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2134 a_jsendbuf = ad->j; 2135 b_jsendbuf = bd->j; 2136 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2137 cnt = 0; 2138 for (i = 0; i < n; i++) { 2139 /* put in lower diagonal portion */ 2140 m = bd->i[i + 1] - bd->i[i]; 2141 while (m > 0) { 2142 /* is it above diagonal (in bd (compressed) numbering) */ 2143 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2144 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2145 m--; 2146 } 2147 2148 /* put in diagonal portion */ 2149 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2150 2151 /* put in upper diagonal portion */ 2152 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2153 } 2154 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2155 2156 /* Gather all column indices to all processors */ 2157 for (i = 0; i < size; i++) { 2158 recvcounts[i] = 0; 2159 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2160 } 2161 displs[0] = 0; 2162 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2163 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2164 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2165 /* set the b->ilen (length of each row) values */ 2166 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2167 /* set the b->i indices */ 2168 b->i[0] = 0; 2169 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2170 PetscCall(PetscFree(lens)); 2171 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2172 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2173 PetscCall(PetscFree(recvcounts)); 2174 2175 PetscCall(MatPropagateSymmetryOptions(A, B)); 2176 *newmat = B; 2177 PetscFunctionReturn(PETSC_SUCCESS); 2178 } 2179 2180 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2181 { 2182 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2183 Vec bb1 = NULL; 2184 2185 PetscFunctionBegin; 2186 if (flag == SOR_APPLY_UPPER) { 2187 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2188 PetscFunctionReturn(PETSC_SUCCESS); 2189 } 2190 2191 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2192 2193 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2194 if (flag & SOR_ZERO_INITIAL_GUESS) { 2195 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2196 its--; 2197 } 2198 2199 while (its--) { 2200 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2201 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2202 2203 /* update rhs: bb1 = bb - B*x */ 2204 PetscCall(VecScale(mat->lvec, -1.0)); 2205 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2206 2207 /* local sweep */ 2208 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2209 } 2210 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2211 if (flag & SOR_ZERO_INITIAL_GUESS) { 2212 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2213 its--; 2214 } 2215 while (its--) { 2216 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2217 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2218 2219 /* update rhs: bb1 = bb - B*x */ 2220 PetscCall(VecScale(mat->lvec, -1.0)); 2221 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2222 2223 /* local sweep */ 2224 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2225 } 2226 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2227 if (flag & SOR_ZERO_INITIAL_GUESS) { 2228 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2229 its--; 2230 } 2231 while (its--) { 2232 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2233 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2234 2235 /* update rhs: bb1 = bb - B*x */ 2236 PetscCall(VecScale(mat->lvec, -1.0)); 2237 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2238 2239 /* local sweep */ 2240 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2241 } 2242 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2243 2244 PetscCall(VecDestroy(&bb1)); 2245 PetscFunctionReturn(PETSC_SUCCESS); 2246 } 2247 2248 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2249 { 2250 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2251 PetscInt m, N, i, *garray = aij->garray; 2252 PetscInt ib, jb, bs = A->rmap->bs; 2253 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2254 MatScalar *a_val = a_aij->a; 2255 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2256 MatScalar *b_val = b_aij->a; 2257 PetscReal *work; 2258 2259 PetscFunctionBegin; 2260 PetscCall(MatGetSize(A, &m, &N)); 2261 PetscCall(PetscCalloc1(N, &work)); 2262 if (type == NORM_2) { 2263 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2264 for (jb = 0; jb < bs; jb++) { 2265 for (ib = 0; ib < bs; ib++) { 2266 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2267 a_val++; 2268 } 2269 } 2270 } 2271 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2272 for (jb = 0; jb < bs; jb++) { 2273 for (ib = 0; ib < bs; ib++) { 2274 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2275 b_val++; 2276 } 2277 } 2278 } 2279 } else if (type == NORM_1) { 2280 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2281 for (jb = 0; jb < bs; jb++) { 2282 for (ib = 0; ib < bs; ib++) { 2283 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2284 a_val++; 2285 } 2286 } 2287 } 2288 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2289 for (jb = 0; jb < bs; jb++) { 2290 for (ib = 0; ib < bs; ib++) { 2291 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2292 b_val++; 2293 } 2294 } 2295 } 2296 } else if (type == NORM_INFINITY) { 2297 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2298 for (jb = 0; jb < bs; jb++) { 2299 for (ib = 0; ib < bs; ib++) { 2300 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2301 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2302 a_val++; 2303 } 2304 } 2305 } 2306 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2307 for (jb = 0; jb < bs; jb++) { 2308 for (ib = 0; ib < bs; ib++) { 2309 int col = garray[b_aij->j[i]] * bs + jb; 2310 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2311 b_val++; 2312 } 2313 } 2314 } 2315 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2316 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2317 for (jb = 0; jb < bs; jb++) { 2318 for (ib = 0; ib < bs; ib++) { 2319 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2320 a_val++; 2321 } 2322 } 2323 } 2324 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2325 for (jb = 0; jb < bs; jb++) { 2326 for (ib = 0; ib < bs; ib++) { 2327 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2328 b_val++; 2329 } 2330 } 2331 } 2332 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2333 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2334 for (jb = 0; jb < bs; jb++) { 2335 for (ib = 0; ib < bs; ib++) { 2336 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2337 a_val++; 2338 } 2339 } 2340 } 2341 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2342 for (jb = 0; jb < bs; jb++) { 2343 for (ib = 0; ib < bs; ib++) { 2344 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2345 b_val++; 2346 } 2347 } 2348 } 2349 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2350 if (type == NORM_INFINITY) { 2351 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2352 } else { 2353 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2354 } 2355 PetscCall(PetscFree(work)); 2356 if (type == NORM_2) { 2357 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2358 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2359 for (i = 0; i < N; i++) reductions[i] /= m; 2360 } 2361 PetscFunctionReturn(PETSC_SUCCESS); 2362 } 2363 2364 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2365 { 2366 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2367 2368 PetscFunctionBegin; 2369 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2370 A->factorerrortype = a->A->factorerrortype; 2371 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2372 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2373 PetscFunctionReturn(PETSC_SUCCESS); 2374 } 2375 2376 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2377 { 2378 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2379 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2380 2381 PetscFunctionBegin; 2382 if (!Y->preallocated) { 2383 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2384 } else if (!aij->nz) { 2385 PetscInt nonew = aij->nonew; 2386 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2387 aij->nonew = nonew; 2388 } 2389 PetscCall(MatShift_Basic(Y, a)); 2390 PetscFunctionReturn(PETSC_SUCCESS); 2391 } 2392 2393 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2394 { 2395 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2396 2397 PetscFunctionBegin; 2398 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2399 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2400 if (d) { 2401 PetscInt rstart; 2402 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2403 *d += rstart / A->rmap->bs; 2404 } 2405 PetscFunctionReturn(PETSC_SUCCESS); 2406 } 2407 2408 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2409 { 2410 PetscFunctionBegin; 2411 *a = ((Mat_MPIBAIJ *)A->data)->A; 2412 PetscFunctionReturn(PETSC_SUCCESS); 2413 } 2414 2415 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2416 { 2417 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2418 2419 PetscFunctionBegin; 2420 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2421 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2422 PetscFunctionReturn(PETSC_SUCCESS); 2423 } 2424 2425 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2426 MatGetRow_MPIBAIJ, 2427 MatRestoreRow_MPIBAIJ, 2428 MatMult_MPIBAIJ, 2429 /* 4*/ MatMultAdd_MPIBAIJ, 2430 MatMultTranspose_MPIBAIJ, 2431 MatMultTransposeAdd_MPIBAIJ, 2432 NULL, 2433 NULL, 2434 NULL, 2435 /*10*/ NULL, 2436 NULL, 2437 NULL, 2438 MatSOR_MPIBAIJ, 2439 MatTranspose_MPIBAIJ, 2440 /*15*/ MatGetInfo_MPIBAIJ, 2441 MatEqual_MPIBAIJ, 2442 MatGetDiagonal_MPIBAIJ, 2443 MatDiagonalScale_MPIBAIJ, 2444 MatNorm_MPIBAIJ, 2445 /*20*/ MatAssemblyBegin_MPIBAIJ, 2446 MatAssemblyEnd_MPIBAIJ, 2447 MatSetOption_MPIBAIJ, 2448 MatZeroEntries_MPIBAIJ, 2449 /*24*/ MatZeroRows_MPIBAIJ, 2450 NULL, 2451 NULL, 2452 NULL, 2453 NULL, 2454 /*29*/ MatSetUp_MPI_Hash, 2455 NULL, 2456 NULL, 2457 MatGetDiagonalBlock_MPIBAIJ, 2458 NULL, 2459 /*34*/ MatDuplicate_MPIBAIJ, 2460 NULL, 2461 NULL, 2462 NULL, 2463 NULL, 2464 /*39*/ MatAXPY_MPIBAIJ, 2465 MatCreateSubMatrices_MPIBAIJ, 2466 MatIncreaseOverlap_MPIBAIJ, 2467 MatGetValues_MPIBAIJ, 2468 MatCopy_MPIBAIJ, 2469 /*44*/ NULL, 2470 MatScale_MPIBAIJ, 2471 MatShift_MPIBAIJ, 2472 NULL, 2473 MatZeroRowsColumns_MPIBAIJ, 2474 /*49*/ NULL, 2475 NULL, 2476 NULL, 2477 NULL, 2478 NULL, 2479 /*54*/ MatFDColoringCreate_MPIXAIJ, 2480 NULL, 2481 MatSetUnfactored_MPIBAIJ, 2482 MatPermute_MPIBAIJ, 2483 MatSetValuesBlocked_MPIBAIJ, 2484 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2485 MatDestroy_MPIBAIJ, 2486 MatView_MPIBAIJ, 2487 NULL, 2488 NULL, 2489 /*64*/ NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2495 NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*74*/ NULL, 2500 MatFDColoringApply_BAIJ, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*79*/ NULL, 2505 NULL, 2506 NULL, 2507 NULL, 2508 MatLoad_MPIBAIJ, 2509 /*84*/ NULL, 2510 NULL, 2511 NULL, 2512 NULL, 2513 NULL, 2514 /*89*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 NULL, 2519 /*94*/ NULL, 2520 NULL, 2521 NULL, 2522 NULL, 2523 NULL, 2524 /*99*/ NULL, 2525 NULL, 2526 NULL, 2527 MatConjugate_MPIBAIJ, 2528 NULL, 2529 /*104*/ NULL, 2530 MatRealPart_MPIBAIJ, 2531 MatImaginaryPart_MPIBAIJ, 2532 NULL, 2533 NULL, 2534 /*109*/ NULL, 2535 NULL, 2536 NULL, 2537 NULL, 2538 MatMissingDiagonal_MPIBAIJ, 2539 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2540 NULL, 2541 MatGetGhosts_MPIBAIJ, 2542 NULL, 2543 NULL, 2544 /*119*/ NULL, 2545 NULL, 2546 NULL, 2547 NULL, 2548 MatGetMultiProcBlock_MPIBAIJ, 2549 /*124*/ NULL, 2550 MatGetColumnReductions_MPIBAIJ, 2551 MatInvertBlockDiagonal_MPIBAIJ, 2552 NULL, 2553 NULL, 2554 /*129*/ NULL, 2555 NULL, 2556 NULL, 2557 NULL, 2558 NULL, 2559 /*134*/ NULL, 2560 NULL, 2561 NULL, 2562 NULL, 2563 NULL, 2564 /*139*/ MatSetBlockSizes_Default, 2565 NULL, 2566 NULL, 2567 MatFDColoringSetUp_MPIXAIJ, 2568 NULL, 2569 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2570 NULL, 2571 NULL, 2572 NULL, 2573 NULL, 2574 NULL, 2575 /*150*/ NULL, 2576 MatEliminateZeros_MPIBAIJ, 2577 NULL}; 2578 2579 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2580 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2581 2582 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2583 { 2584 PetscInt m, rstart, cstart, cend; 2585 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2586 const PetscInt *JJ = NULL; 2587 PetscScalar *values = NULL; 2588 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2589 PetscBool nooffprocentries; 2590 2591 PetscFunctionBegin; 2592 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2593 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2594 PetscCall(PetscLayoutSetUp(B->rmap)); 2595 PetscCall(PetscLayoutSetUp(B->cmap)); 2596 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2597 m = B->rmap->n / bs; 2598 rstart = B->rmap->rstart / bs; 2599 cstart = B->cmap->rstart / bs; 2600 cend = B->cmap->rend / bs; 2601 2602 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2603 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2604 for (i = 0; i < m; i++) { 2605 nz = ii[i + 1] - ii[i]; 2606 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2607 nz_max = PetscMax(nz_max, nz); 2608 dlen = 0; 2609 olen = 0; 2610 JJ = jj + ii[i]; 2611 for (j = 0; j < nz; j++) { 2612 if (*JJ < cstart || *JJ >= cend) olen++; 2613 else dlen++; 2614 JJ++; 2615 } 2616 d_nnz[i] = dlen; 2617 o_nnz[i] = olen; 2618 } 2619 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2620 PetscCall(PetscFree2(d_nnz, o_nnz)); 2621 2622 values = (PetscScalar *)V; 2623 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2624 for (i = 0; i < m; i++) { 2625 PetscInt row = i + rstart; 2626 PetscInt ncols = ii[i + 1] - ii[i]; 2627 const PetscInt *icols = jj + ii[i]; 2628 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2629 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2630 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2631 } else { /* block ordering does not match so we can only insert one block at a time. */ 2632 PetscInt j; 2633 for (j = 0; j < ncols; j++) { 2634 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2635 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2636 } 2637 } 2638 } 2639 2640 if (!V) PetscCall(PetscFree(values)); 2641 nooffprocentries = B->nooffprocentries; 2642 B->nooffprocentries = PETSC_TRUE; 2643 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2644 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2645 B->nooffprocentries = nooffprocentries; 2646 2647 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2648 PetscFunctionReturn(PETSC_SUCCESS); 2649 } 2650 2651 /*@C 2652 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2653 2654 Collective 2655 2656 Input Parameters: 2657 + B - the matrix 2658 . bs - the block size 2659 . i - the indices into `j` for the start of each local row (starts with zero) 2660 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2661 - v - optional values in the matrix, use `NULL` if not provided 2662 2663 Level: advanced 2664 2665 Notes: 2666 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2667 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2668 called this routine. 2669 2670 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2671 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2672 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2673 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2674 block column and the second index is over columns within a block. 2675 2676 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2677 2678 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2679 @*/ 2680 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2681 { 2682 PetscFunctionBegin; 2683 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2684 PetscValidType(B, 1); 2685 PetscValidLogicalCollectiveInt(B, bs, 2); 2686 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2687 PetscFunctionReturn(PETSC_SUCCESS); 2688 } 2689 2690 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2691 { 2692 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2693 PetscInt i; 2694 PetscMPIInt size; 2695 2696 PetscFunctionBegin; 2697 if (B->hash_active) { 2698 B->ops[0] = b->cops; 2699 B->hash_active = PETSC_FALSE; 2700 } 2701 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2702 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2703 PetscCall(PetscLayoutSetUp(B->rmap)); 2704 PetscCall(PetscLayoutSetUp(B->cmap)); 2705 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2706 2707 if (d_nnz) { 2708 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2709 } 2710 if (o_nnz) { 2711 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2712 } 2713 2714 b->bs2 = bs * bs; 2715 b->mbs = B->rmap->n / bs; 2716 b->nbs = B->cmap->n / bs; 2717 b->Mbs = B->rmap->N / bs; 2718 b->Nbs = B->cmap->N / bs; 2719 2720 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2721 b->rstartbs = B->rmap->rstart / bs; 2722 b->rendbs = B->rmap->rend / bs; 2723 b->cstartbs = B->cmap->rstart / bs; 2724 b->cendbs = B->cmap->rend / bs; 2725 2726 #if defined(PETSC_USE_CTABLE) 2727 PetscCall(PetscHMapIDestroy(&b->colmap)); 2728 #else 2729 PetscCall(PetscFree(b->colmap)); 2730 #endif 2731 PetscCall(PetscFree(b->garray)); 2732 PetscCall(VecDestroy(&b->lvec)); 2733 PetscCall(VecScatterDestroy(&b->Mvctx)); 2734 2735 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2736 2737 MatSeqXAIJGetOptions_Private(b->B); 2738 PetscCall(MatDestroy(&b->B)); 2739 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2740 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2741 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2742 MatSeqXAIJRestoreOptions_Private(b->B); 2743 2744 MatSeqXAIJGetOptions_Private(b->A); 2745 PetscCall(MatDestroy(&b->A)); 2746 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2747 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2748 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2749 MatSeqXAIJRestoreOptions_Private(b->A); 2750 2751 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2752 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2753 B->preallocated = PETSC_TRUE; 2754 B->was_assembled = PETSC_FALSE; 2755 B->assembled = PETSC_FALSE; 2756 PetscFunctionReturn(PETSC_SUCCESS); 2757 } 2758 2759 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2760 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2761 2762 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2763 { 2764 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2765 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2766 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2767 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2768 2769 PetscFunctionBegin; 2770 PetscCall(PetscMalloc1(M + 1, &ii)); 2771 ii[0] = 0; 2772 for (i = 0; i < M; i++) { 2773 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2774 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2775 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2776 /* remove one from count of matrix has diagonal */ 2777 for (j = id[i]; j < id[i + 1]; j++) { 2778 if (jd[j] == i) { 2779 ii[i + 1]--; 2780 break; 2781 } 2782 } 2783 } 2784 PetscCall(PetscMalloc1(ii[M], &jj)); 2785 cnt = 0; 2786 for (i = 0; i < M; i++) { 2787 for (j = io[i]; j < io[i + 1]; j++) { 2788 if (garray[jo[j]] > rstart) break; 2789 jj[cnt++] = garray[jo[j]]; 2790 } 2791 for (k = id[i]; k < id[i + 1]; k++) { 2792 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2793 } 2794 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2795 } 2796 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2797 PetscFunctionReturn(PETSC_SUCCESS); 2798 } 2799 2800 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2801 2802 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2803 2804 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2805 { 2806 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2807 Mat_MPIAIJ *b; 2808 Mat B; 2809 2810 PetscFunctionBegin; 2811 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2812 2813 if (reuse == MAT_REUSE_MATRIX) { 2814 B = *newmat; 2815 } else { 2816 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2817 PetscCall(MatSetType(B, MATMPIAIJ)); 2818 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2819 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2820 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2821 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2822 } 2823 b = (Mat_MPIAIJ *)B->data; 2824 2825 if (reuse == MAT_REUSE_MATRIX) { 2826 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2827 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2828 } else { 2829 PetscInt *garray = a->garray; 2830 Mat_SeqAIJ *bB; 2831 PetscInt bs, nnz; 2832 PetscCall(MatDestroy(&b->A)); 2833 PetscCall(MatDestroy(&b->B)); 2834 /* just clear out the data structure */ 2835 PetscCall(MatDisAssemble_MPIAIJ(B)); 2836 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2837 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2838 2839 /* Global numbering for b->B columns */ 2840 bB = (Mat_SeqAIJ *)b->B->data; 2841 bs = A->rmap->bs; 2842 nnz = bB->i[A->rmap->n]; 2843 for (PetscInt k = 0; k < nnz; k++) { 2844 PetscInt bj = bB->j[k] / bs; 2845 PetscInt br = bB->j[k] % bs; 2846 bB->j[k] = garray[bj] * bs + br; 2847 } 2848 } 2849 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2850 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2851 2852 if (reuse == MAT_INPLACE_MATRIX) { 2853 PetscCall(MatHeaderReplace(A, &B)); 2854 } else { 2855 *newmat = B; 2856 } 2857 PetscFunctionReturn(PETSC_SUCCESS); 2858 } 2859 2860 /*MC 2861 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2862 2863 Options Database Keys: 2864 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2865 . -mat_block_size <bs> - set the blocksize used to store the matrix 2866 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2867 - -mat_use_hash_table <fact> - set hash table factor 2868 2869 Level: beginner 2870 2871 Note: 2872 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2873 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2874 2875 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2876 M*/ 2877 2878 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2879 2880 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2881 { 2882 Mat_MPIBAIJ *b; 2883 PetscBool flg = PETSC_FALSE; 2884 2885 PetscFunctionBegin; 2886 PetscCall(PetscNew(&b)); 2887 B->data = (void *)b; 2888 B->ops[0] = MatOps_Values; 2889 B->assembled = PETSC_FALSE; 2890 2891 B->insertmode = NOT_SET_VALUES; 2892 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2893 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2894 2895 /* build local table of row and column ownerships */ 2896 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2897 2898 /* build cache for off array entries formed */ 2899 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2900 2901 b->donotstash = PETSC_FALSE; 2902 b->colmap = NULL; 2903 b->garray = NULL; 2904 b->roworiented = PETSC_TRUE; 2905 2906 /* stuff used in block assembly */ 2907 b->barray = NULL; 2908 2909 /* stuff used for matrix vector multiply */ 2910 b->lvec = NULL; 2911 b->Mvctx = NULL; 2912 2913 /* stuff for MatGetRow() */ 2914 b->rowindices = NULL; 2915 b->rowvalues = NULL; 2916 b->getrowactive = PETSC_FALSE; 2917 2918 /* hash table stuff */ 2919 b->ht = NULL; 2920 b->hd = NULL; 2921 b->ht_size = 0; 2922 b->ht_flag = PETSC_FALSE; 2923 b->ht_fact = 0; 2924 b->ht_total_ct = 0; 2925 b->ht_insert_ct = 0; 2926 2927 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2928 b->ijonly = PETSC_FALSE; 2929 2930 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2931 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2932 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2933 #if defined(PETSC_HAVE_HYPRE) 2934 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2935 #endif 2936 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2937 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2938 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2939 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2940 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2941 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2942 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2943 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2944 2945 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2946 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2947 if (flg) { 2948 PetscReal fact = 1.39; 2949 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2950 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2951 if (fact <= 1.0) fact = 1.39; 2952 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2953 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2954 } 2955 PetscOptionsEnd(); 2956 PetscFunctionReturn(PETSC_SUCCESS); 2957 } 2958 2959 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2960 /*MC 2961 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2962 2963 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2964 and `MATMPIBAIJ` otherwise. 2965 2966 Options Database Keys: 2967 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2968 2969 Level: beginner 2970 2971 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2972 M*/ 2973 2974 /*@C 2975 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2976 (block compressed row). 2977 2978 Collective 2979 2980 Input Parameters: 2981 + B - the matrix 2982 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2983 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2984 . d_nz - number of block nonzeros per block row in diagonal portion of local 2985 submatrix (same for all local rows) 2986 . d_nnz - array containing the number of block nonzeros in the various block rows 2987 of the in diagonal portion of the local (possibly different for each block 2988 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2989 set it even if it is zero. 2990 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2991 submatrix (same for all local rows). 2992 - o_nnz - array containing the number of nonzeros in the various block rows of the 2993 off-diagonal portion of the local submatrix (possibly different for 2994 each block row) or `NULL`. 2995 2996 If the *_nnz parameter is given then the *_nz parameter is ignored 2997 2998 Options Database Keys: 2999 + -mat_block_size - size of the blocks to use 3000 - -mat_use_hash_table <fact> - set hash table factor 3001 3002 Level: intermediate 3003 3004 Notes: 3005 For good matrix assembly performance 3006 the user should preallocate the matrix storage by setting the parameters 3007 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3008 performance can be increased by more than a factor of 50. 3009 3010 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3011 than it must be used on all processors that share the object for that argument. 3012 3013 Storage Information: 3014 For a square global matrix we define each processor's diagonal portion 3015 to be its local rows and the corresponding columns (a square submatrix); 3016 each processor's off-diagonal portion encompasses the remainder of the 3017 local matrix (a rectangular submatrix). 3018 3019 The user can specify preallocated storage for the diagonal part of 3020 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3021 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3022 memory allocation. Likewise, specify preallocated storage for the 3023 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3024 3025 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3026 the figure below we depict these three local rows and all columns (0-11). 3027 3028 .vb 3029 0 1 2 3 4 5 6 7 8 9 10 11 3030 -------------------------- 3031 row 3 |o o o d d d o o o o o o 3032 row 4 |o o o d d d o o o o o o 3033 row 5 |o o o d d d o o o o o o 3034 -------------------------- 3035 .ve 3036 3037 Thus, any entries in the d locations are stored in the d (diagonal) 3038 submatrix, and any entries in the o locations are stored in the 3039 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3040 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3041 3042 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3043 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3044 In general, for PDE problems in which most nonzeros are near the diagonal, 3045 one expects `d_nz` >> `o_nz`. 3046 3047 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3048 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3049 You can also run with the option `-info` and look for messages with the string 3050 malloc in them to see if additional memory allocation was needed. 3051 3052 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3053 @*/ 3054 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3055 { 3056 PetscFunctionBegin; 3057 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3058 PetscValidType(B, 1); 3059 PetscValidLogicalCollectiveInt(B, bs, 2); 3060 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3061 PetscFunctionReturn(PETSC_SUCCESS); 3062 } 3063 3064 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3065 /*@C 3066 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3067 (block compressed row). 3068 3069 Collective 3070 3071 Input Parameters: 3072 + comm - MPI communicator 3073 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3074 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3075 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3076 This value should be the same as the local size used in creating the 3077 y vector for the matrix-vector product y = Ax. 3078 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3079 This value should be the same as the local size used in creating the 3080 x vector for the matrix-vector product y = Ax. 3081 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3082 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3083 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3084 submatrix (same for all local rows) 3085 . d_nnz - array containing the number of nonzero blocks in the various block rows 3086 of the in diagonal portion of the local (possibly different for each block 3087 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3088 and set it even if it is zero. 3089 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3090 submatrix (same for all local rows). 3091 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3092 off-diagonal portion of the local submatrix (possibly different for 3093 each block row) or NULL. 3094 3095 Output Parameter: 3096 . A - the matrix 3097 3098 Options Database Keys: 3099 + -mat_block_size - size of the blocks to use 3100 - -mat_use_hash_table <fact> - set hash table factor 3101 3102 Level: intermediate 3103 3104 Notes: 3105 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3106 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3107 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3108 3109 For good matrix assembly performance 3110 the user should preallocate the matrix storage by setting the parameters 3111 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3112 performance can be increased by more than a factor of 50. 3113 3114 If the *_nnz parameter is given then the *_nz parameter is ignored 3115 3116 A nonzero block is any block that as 1 or more nonzeros in it 3117 3118 The user MUST specify either the local or global matrix dimensions 3119 (possibly both). 3120 3121 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3122 than it must be used on all processors that share the object for that argument. 3123 3124 Storage Information: 3125 For a square global matrix we define each processor's diagonal portion 3126 to be its local rows and the corresponding columns (a square submatrix); 3127 each processor's off-diagonal portion encompasses the remainder of the 3128 local matrix (a rectangular submatrix). 3129 3130 The user can specify preallocated storage for the diagonal part of 3131 the local submatrix with either d_nz or d_nnz (not both). Set 3132 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3133 memory allocation. Likewise, specify preallocated storage for the 3134 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3135 3136 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3137 the figure below we depict these three local rows and all columns (0-11). 3138 3139 .vb 3140 0 1 2 3 4 5 6 7 8 9 10 11 3141 -------------------------- 3142 row 3 |o o o d d d o o o o o o 3143 row 4 |o o o d d d o o o o o o 3144 row 5 |o o o d d d o o o o o o 3145 -------------------------- 3146 .ve 3147 3148 Thus, any entries in the d locations are stored in the d (diagonal) 3149 submatrix, and any entries in the o locations are stored in the 3150 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3151 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3152 3153 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3154 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3155 In general, for PDE problems in which most nonzeros are near the diagonal, 3156 one expects `d_nz` >> `o_nz`. 3157 3158 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3159 @*/ 3160 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3161 { 3162 PetscMPIInt size; 3163 3164 PetscFunctionBegin; 3165 PetscCall(MatCreate(comm, A)); 3166 PetscCall(MatSetSizes(*A, m, n, M, N)); 3167 PetscCallMPI(MPI_Comm_size(comm, &size)); 3168 if (size > 1) { 3169 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3170 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3171 } else { 3172 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3173 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3174 } 3175 PetscFunctionReturn(PETSC_SUCCESS); 3176 } 3177 3178 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3179 { 3180 Mat mat; 3181 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3182 PetscInt len = 0; 3183 3184 PetscFunctionBegin; 3185 *newmat = NULL; 3186 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3187 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3188 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3189 3190 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3191 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3192 if (matin->hash_active) { 3193 PetscCall(MatSetUp(mat)); 3194 } else { 3195 mat->factortype = matin->factortype; 3196 mat->preallocated = PETSC_TRUE; 3197 mat->assembled = PETSC_TRUE; 3198 mat->insertmode = NOT_SET_VALUES; 3199 3200 a = (Mat_MPIBAIJ *)mat->data; 3201 mat->rmap->bs = matin->rmap->bs; 3202 a->bs2 = oldmat->bs2; 3203 a->mbs = oldmat->mbs; 3204 a->nbs = oldmat->nbs; 3205 a->Mbs = oldmat->Mbs; 3206 a->Nbs = oldmat->Nbs; 3207 3208 a->size = oldmat->size; 3209 a->rank = oldmat->rank; 3210 a->donotstash = oldmat->donotstash; 3211 a->roworiented = oldmat->roworiented; 3212 a->rowindices = NULL; 3213 a->rowvalues = NULL; 3214 a->getrowactive = PETSC_FALSE; 3215 a->barray = NULL; 3216 a->rstartbs = oldmat->rstartbs; 3217 a->rendbs = oldmat->rendbs; 3218 a->cstartbs = oldmat->cstartbs; 3219 a->cendbs = oldmat->cendbs; 3220 3221 /* hash table stuff */ 3222 a->ht = NULL; 3223 a->hd = NULL; 3224 a->ht_size = 0; 3225 a->ht_flag = oldmat->ht_flag; 3226 a->ht_fact = oldmat->ht_fact; 3227 a->ht_total_ct = 0; 3228 a->ht_insert_ct = 0; 3229 3230 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3231 if (oldmat->colmap) { 3232 #if defined(PETSC_USE_CTABLE) 3233 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3234 #else 3235 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3236 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3237 #endif 3238 } else a->colmap = NULL; 3239 3240 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3241 PetscCall(PetscMalloc1(len, &a->garray)); 3242 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3243 } else a->garray = NULL; 3244 3245 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3246 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3247 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3248 3249 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3250 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3251 } 3252 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3253 *newmat = mat; 3254 PetscFunctionReturn(PETSC_SUCCESS); 3255 } 3256 3257 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3258 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3259 { 3260 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3261 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3262 PetscScalar *matvals; 3263 3264 PetscFunctionBegin; 3265 PetscCall(PetscViewerSetUp(viewer)); 3266 3267 /* read in matrix header */ 3268 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3269 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3270 M = header[1]; 3271 N = header[2]; 3272 nz = header[3]; 3273 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3274 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3275 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3276 3277 /* set block sizes from the viewer's .info file */ 3278 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3279 /* set local sizes if not set already */ 3280 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3281 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3282 /* set global sizes if not set already */ 3283 if (mat->rmap->N < 0) mat->rmap->N = M; 3284 if (mat->cmap->N < 0) mat->cmap->N = N; 3285 PetscCall(PetscLayoutSetUp(mat->rmap)); 3286 PetscCall(PetscLayoutSetUp(mat->cmap)); 3287 3288 /* check if the matrix sizes are correct */ 3289 PetscCall(MatGetSize(mat, &rows, &cols)); 3290 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3291 PetscCall(MatGetBlockSize(mat, &bs)); 3292 PetscCall(MatGetLocalSize(mat, &m, &n)); 3293 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3294 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3295 mbs = m / bs; 3296 nbs = n / bs; 3297 3298 /* read in row lengths and build row indices */ 3299 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3300 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3301 rowidxs[0] = 0; 3302 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3303 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3304 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3305 3306 /* read in column indices and matrix values */ 3307 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3308 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3309 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3310 3311 { /* preallocate matrix storage */ 3312 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3313 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3314 PetscBool sbaij, done; 3315 PetscInt *d_nnz, *o_nnz; 3316 3317 PetscCall(PetscBTCreate(nbs, &bt)); 3318 PetscCall(PetscHSetICreate(&ht)); 3319 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3320 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3321 for (i = 0; i < mbs; i++) { 3322 PetscCall(PetscBTMemzero(nbs, bt)); 3323 PetscCall(PetscHSetIClear(ht)); 3324 for (k = 0; k < bs; k++) { 3325 PetscInt row = bs * i + k; 3326 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3327 PetscInt col = colidxs[j]; 3328 if (!sbaij || col >= row) { 3329 if (col >= cs && col < ce) { 3330 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3331 } else { 3332 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3333 if (done) o_nnz[i]++; 3334 } 3335 } 3336 } 3337 } 3338 } 3339 PetscCall(PetscBTDestroy(&bt)); 3340 PetscCall(PetscHSetIDestroy(&ht)); 3341 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3342 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3343 PetscCall(PetscFree2(d_nnz, o_nnz)); 3344 } 3345 3346 /* store matrix values */ 3347 for (i = 0; i < m; i++) { 3348 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3349 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3350 } 3351 3352 PetscCall(PetscFree(rowidxs)); 3353 PetscCall(PetscFree2(colidxs, matvals)); 3354 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3355 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3356 PetscFunctionReturn(PETSC_SUCCESS); 3357 } 3358 3359 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3360 { 3361 PetscBool isbinary; 3362 3363 PetscFunctionBegin; 3364 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3365 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3366 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3367 PetscFunctionReturn(PETSC_SUCCESS); 3368 } 3369 3370 /*@ 3371 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3372 3373 Input Parameters: 3374 + mat - the matrix 3375 - fact - factor 3376 3377 Options Database Key: 3378 . -mat_use_hash_table <fact> - provide the factor 3379 3380 Level: advanced 3381 3382 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3383 @*/ 3384 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3385 { 3386 PetscFunctionBegin; 3387 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3388 PetscFunctionReturn(PETSC_SUCCESS); 3389 } 3390 3391 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3392 { 3393 Mat_MPIBAIJ *baij; 3394 3395 PetscFunctionBegin; 3396 baij = (Mat_MPIBAIJ *)mat->data; 3397 baij->ht_fact = fact; 3398 PetscFunctionReturn(PETSC_SUCCESS); 3399 } 3400 3401 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3402 { 3403 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3404 PetscBool flg; 3405 3406 PetscFunctionBegin; 3407 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3408 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3409 if (Ad) *Ad = a->A; 3410 if (Ao) *Ao = a->B; 3411 if (colmap) *colmap = a->garray; 3412 PetscFunctionReturn(PETSC_SUCCESS); 3413 } 3414 3415 /* 3416 Special version for direct calls from Fortran (to eliminate two function call overheads 3417 */ 3418 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3419 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3420 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3421 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3422 #endif 3423 3424 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3425 /*@C 3426 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3427 3428 Collective 3429 3430 Input Parameters: 3431 + matin - the matrix 3432 . min - number of input rows 3433 . im - input rows 3434 . nin - number of input columns 3435 . in - input columns 3436 . v - numerical values input 3437 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3438 3439 Level: advanced 3440 3441 Developer Notes: 3442 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3443 3444 .seealso: `Mat`, `MatSetValuesBlocked()` 3445 @*/ 3446 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3447 { 3448 /* convert input arguments to C version */ 3449 Mat mat = *matin; 3450 PetscInt m = *min, n = *nin; 3451 InsertMode addv = *addvin; 3452 3453 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3454 const MatScalar *value; 3455 MatScalar *barray = baij->barray; 3456 PetscBool roworiented = baij->roworiented; 3457 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3458 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3459 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3460 3461 PetscFunctionBegin; 3462 /* tasks normally handled by MatSetValuesBlocked() */ 3463 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3464 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3465 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3466 if (mat->assembled) { 3467 mat->was_assembled = PETSC_TRUE; 3468 mat->assembled = PETSC_FALSE; 3469 } 3470 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3471 3472 if (!barray) { 3473 PetscCall(PetscMalloc1(bs2, &barray)); 3474 baij->barray = barray; 3475 } 3476 3477 if (roworiented) stepval = (n - 1) * bs; 3478 else stepval = (m - 1) * bs; 3479 3480 for (i = 0; i < m; i++) { 3481 if (im[i] < 0) continue; 3482 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3483 if (im[i] >= rstart && im[i] < rend) { 3484 row = im[i] - rstart; 3485 for (j = 0; j < n; j++) { 3486 /* If NumCol = 1 then a copy is not required */ 3487 if ((roworiented) && (n == 1)) { 3488 barray = (MatScalar *)v + i * bs2; 3489 } else if ((!roworiented) && (m == 1)) { 3490 barray = (MatScalar *)v + j * bs2; 3491 } else { /* Here a copy is required */ 3492 if (roworiented) { 3493 value = v + i * (stepval + bs) * bs + j * bs; 3494 } else { 3495 value = v + j * (stepval + bs) * bs + i * bs; 3496 } 3497 for (ii = 0; ii < bs; ii++, value += stepval) { 3498 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3499 } 3500 barray -= bs2; 3501 } 3502 3503 if (in[j] >= cstart && in[j] < cend) { 3504 col = in[j] - cstart; 3505 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3506 } else if (in[j] < 0) { 3507 continue; 3508 } else { 3509 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3510 if (mat->was_assembled) { 3511 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3512 3513 #if defined(PETSC_USE_DEBUG) 3514 #if defined(PETSC_USE_CTABLE) 3515 { 3516 PetscInt data; 3517 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3518 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3519 } 3520 #else 3521 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3522 #endif 3523 #endif 3524 #if defined(PETSC_USE_CTABLE) 3525 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3526 col = (col - 1) / bs; 3527 #else 3528 col = (baij->colmap[in[j]] - 1) / bs; 3529 #endif 3530 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3531 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3532 col = in[j]; 3533 } 3534 } else col = in[j]; 3535 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3536 } 3537 } 3538 } else { 3539 if (!baij->donotstash) { 3540 if (roworiented) { 3541 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3542 } else { 3543 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3544 } 3545 } 3546 } 3547 } 3548 3549 /* task normally handled by MatSetValuesBlocked() */ 3550 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3551 PetscFunctionReturn(PETSC_SUCCESS); 3552 } 3553 3554 /*@ 3555 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3556 3557 Collective 3558 3559 Input Parameters: 3560 + comm - MPI communicator 3561 . bs - the block size, only a block size of 1 is supported 3562 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3563 . n - This value should be the same as the local size used in creating the 3564 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3565 calculated if `N` is given) For square matrices `n` is almost always `m`. 3566 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3567 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3568 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3569 . j - column indices 3570 - a - matrix values 3571 3572 Output Parameter: 3573 . mat - the matrix 3574 3575 Level: intermediate 3576 3577 Notes: 3578 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3579 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3580 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3581 3582 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3583 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3584 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3585 with column-major ordering within blocks. 3586 3587 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3588 3589 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3590 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3591 @*/ 3592 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3593 { 3594 PetscFunctionBegin; 3595 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3596 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3597 PetscCall(MatCreate(comm, mat)); 3598 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3599 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3600 PetscCall(MatSetBlockSize(*mat, bs)); 3601 PetscCall(MatSetUp(*mat)); 3602 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3603 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3604 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3605 PetscFunctionReturn(PETSC_SUCCESS); 3606 } 3607 3608 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3609 { 3610 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3611 PetscInt *indx; 3612 PetscScalar *values; 3613 3614 PetscFunctionBegin; 3615 PetscCall(MatGetSize(inmat, &m, &N)); 3616 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3617 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3618 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3619 PetscInt *bindx, rmax = a->rmax, j; 3620 PetscMPIInt rank, size; 3621 3622 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3623 mbs = m / bs; 3624 Nbs = N / cbs; 3625 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3626 nbs = n / cbs; 3627 3628 PetscCall(PetscMalloc1(rmax, &bindx)); 3629 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3630 3631 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3632 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3633 if (rank == size - 1) { 3634 /* Check sum(nbs) = Nbs */ 3635 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3636 } 3637 3638 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3639 for (i = 0; i < mbs; i++) { 3640 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3641 nnz = nnz / bs; 3642 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3643 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3644 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3645 } 3646 PetscCall(PetscFree(bindx)); 3647 3648 PetscCall(MatCreate(comm, outmat)); 3649 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3650 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3651 PetscCall(MatSetType(*outmat, MATBAIJ)); 3652 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3653 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3654 MatPreallocateEnd(dnz, onz); 3655 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3656 } 3657 3658 /* numeric phase */ 3659 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3660 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3661 3662 for (i = 0; i < m; i++) { 3663 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3664 Ii = i + rstart; 3665 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3666 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3667 } 3668 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3669 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3670 PetscFunctionReturn(PETSC_SUCCESS); 3671 } 3672