1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 static PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 8 { 9 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 10 11 PetscFunctionBegin; 12 PetscCall(PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N)); 13 PetscCall(MatStashDestroy_Private(&mat->stash)); 14 PetscCall(MatStashDestroy_Private(&mat->bstash)); 15 PetscCall(MatDestroy(&baij->A)); 16 PetscCall(MatDestroy(&baij->B)); 17 #if defined(PETSC_USE_CTABLE) 18 PetscCall(PetscHMapIDestroy(&baij->colmap)); 19 #else 20 PetscCall(PetscFree(baij->colmap)); 21 #endif 22 PetscCall(PetscFree(baij->garray)); 23 PetscCall(VecDestroy(&baij->lvec)); 24 PetscCall(VecScatterDestroy(&baij->Mvctx)); 25 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 26 PetscCall(PetscFree(baij->barray)); 27 PetscCall(PetscFree2(baij->hd, baij->ht)); 28 PetscCall(PetscFree(baij->rangebs)); 29 PetscCall(PetscFree(mat->data)); 30 31 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 32 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 33 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 34 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 35 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 36 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 37 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 38 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 39 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 40 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 41 #if defined(PETSC_HAVE_HYPRE) 42 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 43 #endif 44 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 45 PetscFunctionReturn(PETSC_SUCCESS); 46 } 47 48 /* defines MatSetValues_MPI_Hash(), MatAssemblyBegin_MPI_Hash(), and MatAssemblyEnd_MPI_Hash() */ 49 #define TYPE BAIJ 50 #include "../src/mat/impls/aij/mpi/mpihashmat.h" 51 #undef TYPE 52 53 #if defined(PETSC_HAVE_HYPRE) 54 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 55 #endif 56 57 static PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 58 { 59 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 60 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 61 PetscScalar *va, *vv; 62 Vec vB, vA; 63 const PetscScalar *vb; 64 65 PetscFunctionBegin; 66 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 67 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 68 69 PetscCall(VecGetArrayWrite(vA, &va)); 70 if (idx) { 71 for (i = 0; i < m; i++) { 72 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 73 } 74 } 75 76 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 77 PetscCall(PetscMalloc1(m, &idxb)); 78 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 79 80 PetscCall(VecGetArrayWrite(v, &vv)); 81 PetscCall(VecGetArrayRead(vB, &vb)); 82 for (i = 0; i < m; i++) { 83 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 84 vv[i] = vb[i]; 85 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 86 } else { 87 vv[i] = va[i]; 88 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 89 } 90 } 91 PetscCall(VecRestoreArrayWrite(vA, &vv)); 92 PetscCall(VecRestoreArrayWrite(vA, &va)); 93 PetscCall(VecRestoreArrayRead(vB, &vb)); 94 PetscCall(PetscFree(idxb)); 95 PetscCall(VecDestroy(&vA)); 96 PetscCall(VecDestroy(&vB)); 97 PetscFunctionReturn(PETSC_SUCCESS); 98 } 99 100 static PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 101 { 102 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 103 104 PetscFunctionBegin; 105 PetscCall(MatStoreValues(aij->A)); 106 PetscCall(MatStoreValues(aij->B)); 107 PetscFunctionReturn(PETSC_SUCCESS); 108 } 109 110 static PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 111 { 112 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 113 114 PetscFunctionBegin; 115 PetscCall(MatRetrieveValues(aij->A)); 116 PetscCall(MatRetrieveValues(aij->B)); 117 PetscFunctionReturn(PETSC_SUCCESS); 118 } 119 120 /* 121 Local utility routine that creates a mapping from the global column 122 number to the local number in the off-diagonal part of the local 123 storage of the matrix. This is done in a non scalable way since the 124 length of colmap equals the global matrix length. 125 */ 126 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 127 { 128 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 129 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 130 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 131 132 PetscFunctionBegin; 133 #if defined(PETSC_USE_CTABLE) 134 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 135 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 136 #else 137 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 138 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 139 #endif 140 PetscFunctionReturn(PETSC_SUCCESS); 141 } 142 143 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 144 do { \ 145 brow = row / bs; \ 146 rp = PetscSafePointerPlusOffset(aj, ai[brow]); \ 147 ap = PetscSafePointerPlusOffset(aa, bs2 * ai[brow]); \ 148 rmax = aimax[brow]; \ 149 nrow = ailen[brow]; \ 150 bcol = col / bs; \ 151 ridx = row % bs; \ 152 cidx = col % bs; \ 153 low = 0; \ 154 high = nrow; \ 155 while (high - low > 3) { \ 156 t = (low + high) / 2; \ 157 if (rp[t] > bcol) high = t; \ 158 else low = t; \ 159 } \ 160 for (_i = low; _i < high; _i++) { \ 161 if (rp[_i] > bcol) break; \ 162 if (rp[_i] == bcol) { \ 163 bap = ap + bs2 * _i + bs * cidx + ridx; \ 164 if (addv == ADD_VALUES) *bap += value; \ 165 else *bap = value; \ 166 goto a_noinsert; \ 167 } \ 168 } \ 169 if (a->nonew == 1) goto a_noinsert; \ 170 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 171 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 172 N = nrow++ - 1; \ 173 /* shift up all the later entries in this row */ \ 174 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 175 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 176 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 177 rp[_i] = bcol; \ 178 ap[bs2 * _i + bs * cidx + ridx] = value; \ 179 a_noinsert:; \ 180 ailen[brow] = nrow; \ 181 } while (0) 182 183 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 184 do { \ 185 brow = row / bs; \ 186 rp = PetscSafePointerPlusOffset(bj, bi[brow]); \ 187 ap = PetscSafePointerPlusOffset(ba, bs2 * bi[brow]); \ 188 rmax = bimax[brow]; \ 189 nrow = bilen[brow]; \ 190 bcol = col / bs; \ 191 ridx = row % bs; \ 192 cidx = col % bs; \ 193 low = 0; \ 194 high = nrow; \ 195 while (high - low > 3) { \ 196 t = (low + high) / 2; \ 197 if (rp[t] > bcol) high = t; \ 198 else low = t; \ 199 } \ 200 for (_i = low; _i < high; _i++) { \ 201 if (rp[_i] > bcol) break; \ 202 if (rp[_i] == bcol) { \ 203 bap = ap + bs2 * _i + bs * cidx + ridx; \ 204 if (addv == ADD_VALUES) *bap += value; \ 205 else *bap = value; \ 206 goto b_noinsert; \ 207 } \ 208 } \ 209 if (b->nonew == 1) goto b_noinsert; \ 210 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 211 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 212 N = nrow++ - 1; \ 213 /* shift up all the later entries in this row */ \ 214 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 215 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 216 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 217 rp[_i] = bcol; \ 218 ap[bs2 * _i + bs * cidx + ridx] = value; \ 219 b_noinsert:; \ 220 bilen[brow] = nrow; \ 221 } while (0) 222 223 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 224 { 225 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 226 MatScalar value; 227 PetscBool roworiented = baij->roworiented; 228 PetscInt i, j, row, col; 229 PetscInt rstart_orig = mat->rmap->rstart; 230 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 231 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 232 233 /* Some Variables required in the macro */ 234 Mat A = baij->A; 235 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 236 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 237 MatScalar *aa = a->a; 238 239 Mat B = baij->B; 240 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 241 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 242 MatScalar *ba = b->a; 243 244 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 245 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 246 MatScalar *ap, *bap; 247 248 PetscFunctionBegin; 249 for (i = 0; i < m; i++) { 250 if (im[i] < 0) continue; 251 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 252 if (im[i] >= rstart_orig && im[i] < rend_orig) { 253 row = im[i] - rstart_orig; 254 for (j = 0; j < n; j++) { 255 if (in[j] >= cstart_orig && in[j] < cend_orig) { 256 col = in[j] - cstart_orig; 257 if (roworiented) value = v[i * n + j]; 258 else value = v[i + j * m]; 259 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 260 } else if (in[j] < 0) { 261 continue; 262 } else { 263 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 264 if (mat->was_assembled) { 265 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 266 #if defined(PETSC_USE_CTABLE) 267 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 268 col = col - 1; 269 #else 270 col = baij->colmap[in[j] / bs] - 1; 271 #endif 272 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 273 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 274 col = in[j]; 275 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 276 B = baij->B; 277 b = (Mat_SeqBAIJ *)(B)->data; 278 bimax = b->imax; 279 bi = b->i; 280 bilen = b->ilen; 281 bj = b->j; 282 ba = b->a; 283 } else { 284 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 285 col += in[j] % bs; 286 } 287 } else col = in[j]; 288 if (roworiented) value = v[i * n + j]; 289 else value = v[i + j * m]; 290 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 291 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 292 } 293 } 294 } else { 295 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 296 if (!baij->donotstash) { 297 mat->assembled = PETSC_FALSE; 298 if (roworiented) { 299 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 300 } else { 301 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 302 } 303 } 304 } 305 } 306 PetscFunctionReturn(PETSC_SUCCESS); 307 } 308 309 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 310 { 311 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 312 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 313 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 314 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 315 PetscBool roworiented = a->roworiented; 316 const PetscScalar *value = v; 317 MatScalar *ap, *aa = a->a, *bap; 318 319 PetscFunctionBegin; 320 rp = aj + ai[row]; 321 ap = aa + bs2 * ai[row]; 322 rmax = imax[row]; 323 nrow = ailen[row]; 324 value = v; 325 low = 0; 326 high = nrow; 327 while (high - low > 7) { 328 t = (low + high) / 2; 329 if (rp[t] > col) high = t; 330 else low = t; 331 } 332 for (i = low; i < high; i++) { 333 if (rp[i] > col) break; 334 if (rp[i] == col) { 335 bap = ap + bs2 * i; 336 if (roworiented) { 337 if (is == ADD_VALUES) { 338 for (ii = 0; ii < bs; ii++) { 339 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 340 } 341 } else { 342 for (ii = 0; ii < bs; ii++) { 343 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 344 } 345 } 346 } else { 347 if (is == ADD_VALUES) { 348 for (ii = 0; ii < bs; ii++, value += bs) { 349 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 350 bap += bs; 351 } 352 } else { 353 for (ii = 0; ii < bs; ii++, value += bs) { 354 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 355 bap += bs; 356 } 357 } 358 } 359 goto noinsert2; 360 } 361 } 362 if (nonew == 1) goto noinsert2; 363 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 364 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 365 N = nrow++ - 1; 366 high++; 367 /* shift up all the later entries in this row */ 368 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 369 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 370 rp[i] = col; 371 bap = ap + bs2 * i; 372 if (roworiented) { 373 for (ii = 0; ii < bs; ii++) { 374 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 375 } 376 } else { 377 for (ii = 0; ii < bs; ii++) { 378 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 379 } 380 } 381 noinsert2:; 382 ailen[row] = nrow; 383 PetscFunctionReturn(PETSC_SUCCESS); 384 } 385 386 /* 387 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 388 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 389 */ 390 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 391 { 392 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 393 const PetscScalar *value; 394 MatScalar *barray = baij->barray; 395 PetscBool roworiented = baij->roworiented; 396 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 397 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 398 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 399 400 PetscFunctionBegin; 401 if (!barray) { 402 PetscCall(PetscMalloc1(bs2, &barray)); 403 baij->barray = barray; 404 } 405 406 if (roworiented) stepval = (n - 1) * bs; 407 else stepval = (m - 1) * bs; 408 409 for (i = 0; i < m; i++) { 410 if (im[i] < 0) continue; 411 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 412 if (im[i] >= rstart && im[i] < rend) { 413 row = im[i] - rstart; 414 for (j = 0; j < n; j++) { 415 /* If NumCol = 1 then a copy is not required */ 416 if ((roworiented) && (n == 1)) { 417 barray = (MatScalar *)v + i * bs2; 418 } else if ((!roworiented) && (m == 1)) { 419 barray = (MatScalar *)v + j * bs2; 420 } else { /* Here a copy is required */ 421 if (roworiented) { 422 value = v + (i * (stepval + bs) + j) * bs; 423 } else { 424 value = v + (j * (stepval + bs) + i) * bs; 425 } 426 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 427 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 428 barray += bs; 429 } 430 barray -= bs2; 431 } 432 433 if (in[j] >= cstart && in[j] < cend) { 434 col = in[j] - cstart; 435 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 436 } else if (in[j] < 0) { 437 continue; 438 } else { 439 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 440 if (mat->was_assembled) { 441 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 442 443 #if defined(PETSC_USE_CTABLE) 444 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 445 col = col < 1 ? -1 : (col - 1) / bs; 446 #else 447 col = baij->colmap[in[j]] < 1 ? -1 : (baij->colmap[in[j]] - 1) / bs; 448 #endif 449 if (col < 0 && !((Mat_SeqBAIJ *)baij->B->data)->nonew) { 450 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 451 col = in[j]; 452 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 453 } else col = in[j]; 454 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 455 } 456 } 457 } else { 458 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 459 if (!baij->donotstash) { 460 if (roworiented) { 461 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 462 } else { 463 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 464 } 465 } 466 } 467 } 468 PetscFunctionReturn(PETSC_SUCCESS); 469 } 470 471 #define HASH_KEY 0.6180339887 472 #define HASH(size, key, tmp) (tmp = (key) * HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 473 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 474 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 475 static PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 476 { 477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 478 PetscBool roworiented = baij->roworiented; 479 PetscInt i, j, row, col; 480 PetscInt rstart_orig = mat->rmap->rstart; 481 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 482 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 483 PetscReal tmp; 484 MatScalar **HD = baij->hd, value; 485 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 486 487 PetscFunctionBegin; 488 for (i = 0; i < m; i++) { 489 if (PetscDefined(USE_DEBUG)) { 490 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 491 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 492 } 493 row = im[i]; 494 if (row >= rstart_orig && row < rend_orig) { 495 for (j = 0; j < n; j++) { 496 col = in[j]; 497 if (roworiented) value = v[i * n + j]; 498 else value = v[i + j * m]; 499 /* Look up PetscInto the Hash Table */ 500 key = (row / bs) * Nbs + (col / bs) + 1; 501 h1 = HASH(size, key, tmp); 502 503 idx = h1; 504 if (PetscDefined(USE_DEBUG)) { 505 insert_ct++; 506 total_ct++; 507 if (HT[idx] != key) { 508 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 509 if (idx == size) { 510 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 511 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 512 } 513 } 514 } else if (HT[idx] != key) { 515 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 516 if (idx == size) { 517 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 518 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 519 } 520 } 521 /* A HASH table entry is found, so insert the values at the correct address */ 522 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 523 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 524 } 525 } else if (!baij->donotstash) { 526 if (roworiented) { 527 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 528 } else { 529 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 530 } 531 } 532 } 533 if (PetscDefined(USE_DEBUG)) { 534 baij->ht_total_ct += total_ct; 535 baij->ht_insert_ct += insert_ct; 536 } 537 PetscFunctionReturn(PETSC_SUCCESS); 538 } 539 540 static PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 541 { 542 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 543 PetscBool roworiented = baij->roworiented; 544 PetscInt i, j, ii, jj, row, col; 545 PetscInt rstart = baij->rstartbs; 546 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 547 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 548 PetscReal tmp; 549 MatScalar **HD = baij->hd, *baij_a; 550 const PetscScalar *v_t, *value; 551 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 552 553 PetscFunctionBegin; 554 if (roworiented) stepval = (n - 1) * bs; 555 else stepval = (m - 1) * bs; 556 557 for (i = 0; i < m; i++) { 558 if (PetscDefined(USE_DEBUG)) { 559 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 560 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 561 } 562 row = im[i]; 563 v_t = v + i * nbs2; 564 if (row >= rstart && row < rend) { 565 for (j = 0; j < n; j++) { 566 col = in[j]; 567 568 /* Look up into the Hash Table */ 569 key = row * Nbs + col + 1; 570 h1 = HASH(size, key, tmp); 571 572 idx = h1; 573 if (PetscDefined(USE_DEBUG)) { 574 total_ct++; 575 insert_ct++; 576 if (HT[idx] != key) { 577 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++); 578 if (idx == size) { 579 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++); 580 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 581 } 582 } 583 } else if (HT[idx] != key) { 584 for (idx = h1; (idx < size) && (HT[idx] != key); idx++); 585 if (idx == size) { 586 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++); 587 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 588 } 589 } 590 baij_a = HD[idx]; 591 if (roworiented) { 592 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 593 /* value = v + (i*(stepval+bs)+j)*bs; */ 594 value = v_t; 595 v_t += bs; 596 if (addv == ADD_VALUES) { 597 for (ii = 0; ii < bs; ii++, value += stepval) { 598 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 599 } 600 } else { 601 for (ii = 0; ii < bs; ii++, value += stepval) { 602 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 603 } 604 } 605 } else { 606 value = v + j * (stepval + bs) * bs + i * bs; 607 if (addv == ADD_VALUES) { 608 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 609 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 610 } 611 } else { 612 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 613 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 614 } 615 } 616 } 617 } 618 } else { 619 if (!baij->donotstash) { 620 if (roworiented) { 621 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 622 } else { 623 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 624 } 625 } 626 } 627 } 628 if (PetscDefined(USE_DEBUG)) { 629 baij->ht_total_ct += total_ct; 630 baij->ht_insert_ct += insert_ct; 631 } 632 PetscFunctionReturn(PETSC_SUCCESS); 633 } 634 635 static PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 636 { 637 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 638 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 639 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 640 641 PetscFunctionBegin; 642 for (i = 0; i < m; i++) { 643 if (idxm[i] < 0) continue; /* negative row */ 644 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 645 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 646 row = idxm[i] - bsrstart; 647 for (j = 0; j < n; j++) { 648 if (idxn[j] < 0) continue; /* negative column */ 649 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 650 if (idxn[j] >= bscstart && idxn[j] < bscend) { 651 col = idxn[j] - bscstart; 652 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 653 } else { 654 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 655 #if defined(PETSC_USE_CTABLE) 656 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 657 data--; 658 #else 659 data = baij->colmap[idxn[j] / bs] - 1; 660 #endif 661 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 662 else { 663 col = data + idxn[j] % bs; 664 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 665 } 666 } 667 } 668 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 669 } 670 PetscFunctionReturn(PETSC_SUCCESS); 671 } 672 673 static PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 674 { 675 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 676 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 677 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 678 PetscReal sum = 0.0; 679 MatScalar *v; 680 681 PetscFunctionBegin; 682 if (baij->size == 1) { 683 PetscCall(MatNorm(baij->A, type, nrm)); 684 } else { 685 if (type == NORM_FROBENIUS) { 686 v = amat->a; 687 nz = amat->nz * bs2; 688 for (i = 0; i < nz; i++) { 689 sum += PetscRealPart(PetscConj(*v) * (*v)); 690 v++; 691 } 692 v = bmat->a; 693 nz = bmat->nz * bs2; 694 for (i = 0; i < nz; i++) { 695 sum += PetscRealPart(PetscConj(*v) * (*v)); 696 v++; 697 } 698 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 699 *nrm = PetscSqrtReal(*nrm); 700 } else if (type == NORM_1) { /* max column sum */ 701 PetscReal *tmp, *tmp2; 702 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 703 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 704 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 705 v = amat->a; 706 jj = amat->j; 707 for (i = 0; i < amat->nz; i++) { 708 for (j = 0; j < bs; j++) { 709 col = bs * (cstart + *jj) + j; /* column index */ 710 for (row = 0; row < bs; row++) { 711 tmp[col] += PetscAbsScalar(*v); 712 v++; 713 } 714 } 715 jj++; 716 } 717 v = bmat->a; 718 jj = bmat->j; 719 for (i = 0; i < bmat->nz; i++) { 720 for (j = 0; j < bs; j++) { 721 col = bs * garray[*jj] + j; 722 for (row = 0; row < bs; row++) { 723 tmp[col] += PetscAbsScalar(*v); 724 v++; 725 } 726 } 727 jj++; 728 } 729 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 730 *nrm = 0.0; 731 for (j = 0; j < mat->cmap->N; j++) { 732 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 733 } 734 PetscCall(PetscFree(tmp)); 735 PetscCall(PetscFree(tmp2)); 736 } else if (type == NORM_INFINITY) { /* max row sum */ 737 PetscReal *sums; 738 PetscCall(PetscMalloc1(bs, &sums)); 739 sum = 0.0; 740 for (j = 0; j < amat->mbs; j++) { 741 for (row = 0; row < bs; row++) sums[row] = 0.0; 742 v = amat->a + bs2 * amat->i[j]; 743 nz = amat->i[j + 1] - amat->i[j]; 744 for (i = 0; i < nz; i++) { 745 for (col = 0; col < bs; col++) { 746 for (row = 0; row < bs; row++) { 747 sums[row] += PetscAbsScalar(*v); 748 v++; 749 } 750 } 751 } 752 v = bmat->a + bs2 * bmat->i[j]; 753 nz = bmat->i[j + 1] - bmat->i[j]; 754 for (i = 0; i < nz; i++) { 755 for (col = 0; col < bs; col++) { 756 for (row = 0; row < bs; row++) { 757 sums[row] += PetscAbsScalar(*v); 758 v++; 759 } 760 } 761 } 762 for (row = 0; row < bs; row++) { 763 if (sums[row] > sum) sum = sums[row]; 764 } 765 } 766 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 767 PetscCall(PetscFree(sums)); 768 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 769 } 770 PetscFunctionReturn(PETSC_SUCCESS); 771 } 772 773 /* 774 Creates the hash table, and sets the table 775 This table is created only once. 776 If new entried need to be added to the matrix 777 then the hash table has to be destroyed and 778 recreated. 779 */ 780 static PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 781 { 782 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 783 Mat A = baij->A, B = baij->B; 784 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 785 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 786 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 787 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 788 PetscInt *HT, key; 789 MatScalar **HD; 790 PetscReal tmp; 791 #if defined(PETSC_USE_INFO) 792 PetscInt ct = 0, max = 0; 793 #endif 794 795 PetscFunctionBegin; 796 if (baij->ht) PetscFunctionReturn(PETSC_SUCCESS); 797 798 baij->ht_size = (PetscInt)(factor * nz); 799 ht_size = baij->ht_size; 800 801 /* Allocate Memory for Hash Table */ 802 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 803 HD = baij->hd; 804 HT = baij->ht; 805 806 /* Loop Over A */ 807 for (i = 0; i < a->mbs; i++) { 808 for (j = ai[i]; j < ai[i + 1]; j++) { 809 row = i + rstart; 810 col = aj[j] + cstart; 811 812 key = row * Nbs + col + 1; 813 h1 = HASH(ht_size, key, tmp); 814 for (k = 0; k < ht_size; k++) { 815 if (!HT[(h1 + k) % ht_size]) { 816 HT[(h1 + k) % ht_size] = key; 817 HD[(h1 + k) % ht_size] = a->a + j * bs2; 818 break; 819 #if defined(PETSC_USE_INFO) 820 } else { 821 ct++; 822 #endif 823 } 824 } 825 #if defined(PETSC_USE_INFO) 826 if (k > max) max = k; 827 #endif 828 } 829 } 830 /* Loop Over B */ 831 for (i = 0; i < b->mbs; i++) { 832 for (j = bi[i]; j < bi[i + 1]; j++) { 833 row = i + rstart; 834 col = garray[bj[j]]; 835 key = row * Nbs + col + 1; 836 h1 = HASH(ht_size, key, tmp); 837 for (k = 0; k < ht_size; k++) { 838 if (!HT[(h1 + k) % ht_size]) { 839 HT[(h1 + k) % ht_size] = key; 840 HD[(h1 + k) % ht_size] = b->a + j * bs2; 841 break; 842 #if defined(PETSC_USE_INFO) 843 } else { 844 ct++; 845 #endif 846 } 847 } 848 #if defined(PETSC_USE_INFO) 849 if (k > max) max = k; 850 #endif 851 } 852 } 853 854 /* Print Summary */ 855 #if defined(PETSC_USE_INFO) 856 for (i = 0, j = 0; i < ht_size; i++) { 857 if (HT[i]) j++; 858 } 859 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 860 #endif 861 PetscFunctionReturn(PETSC_SUCCESS); 862 } 863 864 static PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 865 { 866 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 867 PetscInt nstash, reallocs; 868 869 PetscFunctionBegin; 870 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(PETSC_SUCCESS); 871 872 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 873 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 874 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 875 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 876 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 877 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 878 PetscFunctionReturn(PETSC_SUCCESS); 879 } 880 881 static PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 882 { 883 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 884 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 885 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 886 PetscInt *row, *col; 887 PetscBool r1, r2, r3, other_disassembled; 888 MatScalar *val; 889 PetscMPIInt n; 890 891 PetscFunctionBegin; 892 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 893 if (!baij->donotstash && !mat->nooffprocentries) { 894 while (1) { 895 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 896 if (!flg) break; 897 898 for (i = 0; i < n;) { 899 /* Now identify the consecutive vals belonging to the same row */ 900 for (j = i, rstart = row[j]; j < n; j++) { 901 if (row[j] != rstart) break; 902 } 903 if (j < n) ncols = j - i; 904 else ncols = n - i; 905 /* Now assemble all these values with a single function call */ 906 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 907 i = j; 908 } 909 } 910 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 911 /* Now process the block-stash. Since the values are stashed column-oriented, 912 set the row-oriented flag to column-oriented, and after MatSetValues() 913 restore the original flags */ 914 r1 = baij->roworiented; 915 r2 = a->roworiented; 916 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 917 918 baij->roworiented = PETSC_FALSE; 919 a->roworiented = PETSC_FALSE; 920 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = PETSC_FALSE; 921 while (1) { 922 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 923 if (!flg) break; 924 925 for (i = 0; i < n;) { 926 /* Now identify the consecutive vals belonging to the same row */ 927 for (j = i, rstart = row[j]; j < n; j++) { 928 if (row[j] != rstart) break; 929 } 930 if (j < n) ncols = j - i; 931 else ncols = n - i; 932 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 933 i = j; 934 } 935 } 936 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 937 938 baij->roworiented = r1; 939 a->roworiented = r2; 940 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; 941 } 942 943 PetscCall(MatAssemblyBegin(baij->A, mode)); 944 PetscCall(MatAssemblyEnd(baij->A, mode)); 945 946 /* determine if any processor has disassembled, if so we must 947 also disassemble ourselves, in order that we may reassemble. */ 948 /* 949 if nonzero structure of submatrix B cannot change then we know that 950 no processor disassembled thus we can skip this stuff 951 */ 952 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 953 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 954 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 955 } 956 957 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 958 PetscCall(MatAssemblyBegin(baij->B, mode)); 959 PetscCall(MatAssemblyEnd(baij->B, mode)); 960 961 #if defined(PETSC_USE_INFO) 962 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 963 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 964 965 baij->ht_total_ct = 0; 966 baij->ht_insert_ct = 0; 967 } 968 #endif 969 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 970 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 971 972 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 973 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 974 } 975 976 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 977 978 baij->rowvalues = NULL; 979 980 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 981 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 982 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 983 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 984 } 985 PetscFunctionReturn(PETSC_SUCCESS); 986 } 987 988 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 989 #include <petscdraw.h> 990 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 991 { 992 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 993 PetscMPIInt rank = baij->rank; 994 PetscInt bs = mat->rmap->bs; 995 PetscBool iascii, isdraw; 996 PetscViewer sviewer; 997 PetscViewerFormat format; 998 999 PetscFunctionBegin; 1000 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1001 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1002 if (iascii) { 1003 PetscCall(PetscViewerGetFormat(viewer, &format)); 1004 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 1005 MatInfo info; 1006 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1007 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 1008 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 1009 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 1010 mat->rmap->bs, (double)info.memory)); 1011 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 1012 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1013 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 1014 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 1015 PetscCall(PetscViewerFlush(viewer)); 1016 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 1017 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 1018 PetscCall(VecScatterView(baij->Mvctx, viewer)); 1019 PetscFunctionReturn(PETSC_SUCCESS); 1020 } else if (format == PETSC_VIEWER_ASCII_INFO) { 1021 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 1022 PetscFunctionReturn(PETSC_SUCCESS); 1023 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 1024 PetscFunctionReturn(PETSC_SUCCESS); 1025 } 1026 } 1027 1028 if (isdraw) { 1029 PetscDraw draw; 1030 PetscBool isnull; 1031 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1032 PetscCall(PetscDrawIsNull(draw, &isnull)); 1033 if (isnull) PetscFunctionReturn(PETSC_SUCCESS); 1034 } 1035 1036 { 1037 /* assemble the entire matrix onto first processor. */ 1038 Mat A; 1039 Mat_SeqBAIJ *Aloc; 1040 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1041 MatScalar *a; 1042 const char *matname; 1043 1044 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1045 /* Perhaps this should be the type of mat? */ 1046 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1047 if (rank == 0) { 1048 PetscCall(MatSetSizes(A, M, N, M, N)); 1049 } else { 1050 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1051 } 1052 PetscCall(MatSetType(A, MATMPIBAIJ)); 1053 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1054 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1055 1056 /* copy over the A part */ 1057 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1058 ai = Aloc->i; 1059 aj = Aloc->j; 1060 a = Aloc->a; 1061 PetscCall(PetscMalloc1(bs, &rvals)); 1062 1063 for (i = 0; i < mbs; i++) { 1064 rvals[0] = bs * (baij->rstartbs + i); 1065 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1066 for (j = ai[i]; j < ai[i + 1]; j++) { 1067 col = (baij->cstartbs + aj[j]) * bs; 1068 for (k = 0; k < bs; k++) { 1069 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1070 col++; 1071 a += bs; 1072 } 1073 } 1074 } 1075 /* copy over the B part */ 1076 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1077 ai = Aloc->i; 1078 aj = Aloc->j; 1079 a = Aloc->a; 1080 for (i = 0; i < mbs; i++) { 1081 rvals[0] = bs * (baij->rstartbs + i); 1082 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1083 for (j = ai[i]; j < ai[i + 1]; j++) { 1084 col = baij->garray[aj[j]] * bs; 1085 for (k = 0; k < bs; k++) { 1086 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1087 col++; 1088 a += bs; 1089 } 1090 } 1091 } 1092 PetscCall(PetscFree(rvals)); 1093 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1094 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1095 /* 1096 Everyone has to call to draw the matrix since the graphics waits are 1097 synchronized across all processors that share the PetscDraw object 1098 */ 1099 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1100 if (((PetscObject)mat)->name) PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1101 if (rank == 0) { 1102 if (((PetscObject)mat)->name) PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)A->data)->A, matname)); 1103 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)A->data)->A, sviewer)); 1104 } 1105 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1106 PetscCall(MatDestroy(&A)); 1107 } 1108 PetscFunctionReturn(PETSC_SUCCESS); 1109 } 1110 1111 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1112 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1113 { 1114 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1115 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1116 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1117 const PetscInt *garray = aij->garray; 1118 PetscInt header[4], M, N, m, rs, cs, bs, cnt, i, j, ja, jb, k, l; 1119 PetscInt64 nz, hnz; 1120 PetscInt *rowlens, *colidxs; 1121 PetscScalar *matvals; 1122 PetscMPIInt rank; 1123 1124 PetscFunctionBegin; 1125 PetscCall(PetscViewerSetUp(viewer)); 1126 1127 M = mat->rmap->N; 1128 N = mat->cmap->N; 1129 m = mat->rmap->n; 1130 rs = mat->rmap->rstart; 1131 cs = mat->cmap->rstart; 1132 bs = mat->rmap->bs; 1133 nz = bs * bs * (A->nz + B->nz); 1134 1135 /* write matrix header */ 1136 header[0] = MAT_FILE_CLASSID; 1137 header[1] = M; 1138 header[2] = N; 1139 PetscCallMPI(MPI_Reduce(&nz, &hnz, 1, MPIU_INT64, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1140 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 1141 if (rank == 0) PetscCall(PetscIntCast(hnz, &header[3])); 1142 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1143 1144 /* fill in and store row lengths */ 1145 PetscCall(PetscMalloc1(m, &rowlens)); 1146 for (cnt = 0, i = 0; i < A->mbs; i++) 1147 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1148 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1149 PetscCall(PetscFree(rowlens)); 1150 1151 /* fill in and store column indices */ 1152 PetscCall(PetscMalloc1(nz, &colidxs)); 1153 for (cnt = 0, i = 0; i < A->mbs; i++) { 1154 for (k = 0; k < bs; k++) { 1155 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1156 if (garray[B->j[jb]] > cs / bs) break; 1157 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1158 } 1159 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1160 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1161 for (; jb < B->i[i + 1]; jb++) 1162 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1163 } 1164 } 1165 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt64_FMT, cnt, nz); 1166 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1167 PetscCall(PetscFree(colidxs)); 1168 1169 /* fill in and store nonzero values */ 1170 PetscCall(PetscMalloc1(nz, &matvals)); 1171 for (cnt = 0, i = 0; i < A->mbs; i++) { 1172 for (k = 0; k < bs; k++) { 1173 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1174 if (garray[B->j[jb]] > cs / bs) break; 1175 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1176 } 1177 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1178 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1179 for (; jb < B->i[i + 1]; jb++) 1180 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1181 } 1182 } 1183 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1184 PetscCall(PetscFree(matvals)); 1185 1186 /* write block size option to the viewer's .info file */ 1187 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1188 PetscFunctionReturn(PETSC_SUCCESS); 1189 } 1190 1191 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1192 { 1193 PetscBool iascii, isdraw, issocket, isbinary; 1194 1195 PetscFunctionBegin; 1196 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1197 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1198 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1199 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1200 if (iascii || isdraw || issocket) { 1201 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1202 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1203 PetscFunctionReturn(PETSC_SUCCESS); 1204 } 1205 1206 static PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1207 { 1208 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1209 PetscInt nt; 1210 1211 PetscFunctionBegin; 1212 PetscCall(VecGetLocalSize(xx, &nt)); 1213 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1214 PetscCall(VecGetLocalSize(yy, &nt)); 1215 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and yy"); 1216 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1217 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1218 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1219 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1220 PetscFunctionReturn(PETSC_SUCCESS); 1221 } 1222 1223 static PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1224 { 1225 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1229 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1230 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1231 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1232 PetscFunctionReturn(PETSC_SUCCESS); 1233 } 1234 1235 static PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1236 { 1237 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1238 1239 PetscFunctionBegin; 1240 /* do nondiagonal part */ 1241 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1242 /* do local part */ 1243 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1244 /* add partial results together */ 1245 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1246 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1247 PetscFunctionReturn(PETSC_SUCCESS); 1248 } 1249 1250 static PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1251 { 1252 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1253 1254 PetscFunctionBegin; 1255 /* do nondiagonal part */ 1256 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1257 /* do local part */ 1258 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1259 /* add partial results together */ 1260 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1261 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscFunctionReturn(PETSC_SUCCESS); 1263 } 1264 1265 /* 1266 This only works correctly for square matrices where the subblock A->A is the 1267 diagonal block 1268 */ 1269 static PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1270 { 1271 PetscFunctionBegin; 1272 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1273 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1274 PetscFunctionReturn(PETSC_SUCCESS); 1275 } 1276 1277 static PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1278 { 1279 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1280 1281 PetscFunctionBegin; 1282 PetscCall(MatScale(a->A, aa)); 1283 PetscCall(MatScale(a->B, aa)); 1284 PetscFunctionReturn(PETSC_SUCCESS); 1285 } 1286 1287 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1288 { 1289 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1290 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1291 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1292 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1293 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1294 1295 PetscFunctionBegin; 1296 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1297 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1298 mat->getrowactive = PETSC_TRUE; 1299 1300 if (!mat->rowvalues && (idx || v)) { 1301 /* 1302 allocate enough space to hold information from the longest row. 1303 */ 1304 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1305 PetscInt max = 1, mbs = mat->mbs, tmp; 1306 for (i = 0; i < mbs; i++) { 1307 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1308 if (max < tmp) max = tmp; 1309 } 1310 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1311 } 1312 lrow = row - brstart; 1313 1314 pvA = &vworkA; 1315 pcA = &cworkA; 1316 pvB = &vworkB; 1317 pcB = &cworkB; 1318 if (!v) { 1319 pvA = NULL; 1320 pvB = NULL; 1321 } 1322 if (!idx) { 1323 pcA = NULL; 1324 if (!v) pcB = NULL; 1325 } 1326 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1327 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1328 nztot = nzA + nzB; 1329 1330 cmap = mat->garray; 1331 if (v || idx) { 1332 if (nztot) { 1333 /* Sort by increasing column numbers, assuming A and B already sorted */ 1334 PetscInt imark = -1; 1335 if (v) { 1336 *v = v_p = mat->rowvalues; 1337 for (i = 0; i < nzB; i++) { 1338 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1339 else break; 1340 } 1341 imark = i; 1342 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1343 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1344 } 1345 if (idx) { 1346 *idx = idx_p = mat->rowindices; 1347 if (imark > -1) { 1348 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1349 } else { 1350 for (i = 0; i < nzB; i++) { 1351 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1352 else break; 1353 } 1354 imark = i; 1355 } 1356 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1357 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1358 } 1359 } else { 1360 if (idx) *idx = NULL; 1361 if (v) *v = NULL; 1362 } 1363 } 1364 *nz = nztot; 1365 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1366 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1367 PetscFunctionReturn(PETSC_SUCCESS); 1368 } 1369 1370 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1371 { 1372 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1373 1374 PetscFunctionBegin; 1375 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1376 baij->getrowactive = PETSC_FALSE; 1377 PetscFunctionReturn(PETSC_SUCCESS); 1378 } 1379 1380 static PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1381 { 1382 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1383 1384 PetscFunctionBegin; 1385 PetscCall(MatZeroEntries(l->A)); 1386 PetscCall(MatZeroEntries(l->B)); 1387 PetscFunctionReturn(PETSC_SUCCESS); 1388 } 1389 1390 static PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1391 { 1392 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1393 Mat A = a->A, B = a->B; 1394 PetscLogDouble isend[5], irecv[5]; 1395 1396 PetscFunctionBegin; 1397 info->block_size = (PetscReal)matin->rmap->bs; 1398 1399 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1400 1401 isend[0] = info->nz_used; 1402 isend[1] = info->nz_allocated; 1403 isend[2] = info->nz_unneeded; 1404 isend[3] = info->memory; 1405 isend[4] = info->mallocs; 1406 1407 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1408 1409 isend[0] += info->nz_used; 1410 isend[1] += info->nz_allocated; 1411 isend[2] += info->nz_unneeded; 1412 isend[3] += info->memory; 1413 isend[4] += info->mallocs; 1414 1415 if (flag == MAT_LOCAL) { 1416 info->nz_used = isend[0]; 1417 info->nz_allocated = isend[1]; 1418 info->nz_unneeded = isend[2]; 1419 info->memory = isend[3]; 1420 info->mallocs = isend[4]; 1421 } else if (flag == MAT_GLOBAL_MAX) { 1422 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1423 1424 info->nz_used = irecv[0]; 1425 info->nz_allocated = irecv[1]; 1426 info->nz_unneeded = irecv[2]; 1427 info->memory = irecv[3]; 1428 info->mallocs = irecv[4]; 1429 } else if (flag == MAT_GLOBAL_SUM) { 1430 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1431 1432 info->nz_used = irecv[0]; 1433 info->nz_allocated = irecv[1]; 1434 info->nz_unneeded = irecv[2]; 1435 info->memory = irecv[3]; 1436 info->mallocs = irecv[4]; 1437 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1438 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1439 info->fill_ratio_needed = 0; 1440 info->factor_mallocs = 0; 1441 PetscFunctionReturn(PETSC_SUCCESS); 1442 } 1443 1444 static PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1445 { 1446 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1447 1448 PetscFunctionBegin; 1449 switch (op) { 1450 case MAT_NEW_NONZERO_LOCATIONS: 1451 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1452 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1453 case MAT_KEEP_NONZERO_PATTERN: 1454 case MAT_NEW_NONZERO_LOCATION_ERR: 1455 MatCheckPreallocated(A, 1); 1456 PetscCall(MatSetOption(a->A, op, flg)); 1457 PetscCall(MatSetOption(a->B, op, flg)); 1458 break; 1459 case MAT_ROW_ORIENTED: 1460 MatCheckPreallocated(A, 1); 1461 a->roworiented = flg; 1462 1463 PetscCall(MatSetOption(a->A, op, flg)); 1464 PetscCall(MatSetOption(a->B, op, flg)); 1465 break; 1466 case MAT_FORCE_DIAGONAL_ENTRIES: 1467 case MAT_SORTED_FULL: 1468 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1469 break; 1470 case MAT_IGNORE_OFF_PROC_ENTRIES: 1471 a->donotstash = flg; 1472 break; 1473 case MAT_USE_HASH_TABLE: 1474 a->ht_flag = flg; 1475 a->ht_fact = 1.39; 1476 break; 1477 case MAT_SYMMETRIC: 1478 case MAT_STRUCTURALLY_SYMMETRIC: 1479 case MAT_HERMITIAN: 1480 case MAT_SUBMAT_SINGLEIS: 1481 case MAT_SYMMETRY_ETERNAL: 1482 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1483 case MAT_SPD_ETERNAL: 1484 /* if the diagonal matrix is square it inherits some of the properties above */ 1485 break; 1486 default: 1487 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1488 } 1489 PetscFunctionReturn(PETSC_SUCCESS); 1490 } 1491 1492 static PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1493 { 1494 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1495 Mat_SeqBAIJ *Aloc; 1496 Mat B; 1497 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1498 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1499 MatScalar *a; 1500 1501 PetscFunctionBegin; 1502 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1503 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1504 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1505 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1506 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1507 /* Do not know preallocation information, but must set block size */ 1508 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1509 } else { 1510 B = *matout; 1511 } 1512 1513 /* copy over the A part */ 1514 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1515 ai = Aloc->i; 1516 aj = Aloc->j; 1517 a = Aloc->a; 1518 PetscCall(PetscMalloc1(bs, &rvals)); 1519 1520 for (i = 0; i < mbs; i++) { 1521 rvals[0] = bs * (baij->rstartbs + i); 1522 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1523 for (j = ai[i]; j < ai[i + 1]; j++) { 1524 col = (baij->cstartbs + aj[j]) * bs; 1525 for (k = 0; k < bs; k++) { 1526 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1527 1528 col++; 1529 a += bs; 1530 } 1531 } 1532 } 1533 /* copy over the B part */ 1534 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1535 ai = Aloc->i; 1536 aj = Aloc->j; 1537 a = Aloc->a; 1538 for (i = 0; i < mbs; i++) { 1539 rvals[0] = bs * (baij->rstartbs + i); 1540 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1541 for (j = ai[i]; j < ai[i + 1]; j++) { 1542 col = baij->garray[aj[j]] * bs; 1543 for (k = 0; k < bs; k++) { 1544 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1545 col++; 1546 a += bs; 1547 } 1548 } 1549 } 1550 PetscCall(PetscFree(rvals)); 1551 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1552 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1553 1554 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1555 else PetscCall(MatHeaderMerge(A, &B)); 1556 PetscFunctionReturn(PETSC_SUCCESS); 1557 } 1558 1559 static PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1560 { 1561 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1562 Mat a = baij->A, b = baij->B; 1563 PetscInt s1, s2, s3; 1564 1565 PetscFunctionBegin; 1566 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1567 if (rr) { 1568 PetscCall(VecGetLocalSize(rr, &s1)); 1569 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1570 /* Overlap communication with computation. */ 1571 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1572 } 1573 if (ll) { 1574 PetscCall(VecGetLocalSize(ll, &s1)); 1575 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1576 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1577 } 1578 /* scale the diagonal block */ 1579 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1580 1581 if (rr) { 1582 /* Do a scatter end and then right scale the off-diagonal block */ 1583 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1584 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1585 } 1586 PetscFunctionReturn(PETSC_SUCCESS); 1587 } 1588 1589 static PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1590 { 1591 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1592 PetscInt *lrows; 1593 PetscInt r, len; 1594 PetscBool cong; 1595 1596 PetscFunctionBegin; 1597 /* get locally owned rows */ 1598 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1599 /* fix right-hand side if needed */ 1600 if (x && b) { 1601 const PetscScalar *xx; 1602 PetscScalar *bb; 1603 1604 PetscCall(VecGetArrayRead(x, &xx)); 1605 PetscCall(VecGetArray(b, &bb)); 1606 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1607 PetscCall(VecRestoreArrayRead(x, &xx)); 1608 PetscCall(VecRestoreArray(b, &bb)); 1609 } 1610 1611 /* actually zap the local rows */ 1612 /* 1613 Zero the required rows. If the "diagonal block" of the matrix 1614 is square and the user wishes to set the diagonal we use separate 1615 code so that MatSetValues() is not called for each diagonal allocating 1616 new memory, thus calling lots of mallocs and slowing things down. 1617 1618 */ 1619 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1620 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1621 PetscCall(MatHasCongruentLayouts(A, &cong)); 1622 if ((diag != 0.0) && cong) { 1623 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1624 } else if (diag != 0.0) { 1625 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1626 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options MAT_NEW_NONZERO_LOCATIONS, MAT_NEW_NONZERO_LOCATION_ERR, and MAT_NEW_NONZERO_ALLOCATION_ERR"); 1627 for (r = 0; r < len; ++r) { 1628 const PetscInt row = lrows[r] + A->rmap->rstart; 1629 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1630 } 1631 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1632 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1633 } else { 1634 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1635 } 1636 PetscCall(PetscFree(lrows)); 1637 1638 /* only change matrix nonzero state if pattern was allowed to be changed */ 1639 if (!((Mat_SeqBAIJ *)l->A->data)->keepnonzeropattern || !((Mat_SeqBAIJ *)l->A->data)->nonew) { 1640 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1641 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1642 } 1643 PetscFunctionReturn(PETSC_SUCCESS); 1644 } 1645 1646 static PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1647 { 1648 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1649 PetscMPIInt n = A->rmap->n, p = 0; 1650 PetscInt i, j, k, r, len = 0, row, col, count; 1651 PetscInt *lrows, *owners = A->rmap->range; 1652 PetscSFNode *rrows; 1653 PetscSF sf; 1654 const PetscScalar *xx; 1655 PetscScalar *bb, *mask; 1656 Vec xmask, lmask; 1657 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1658 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1659 PetscScalar *aa; 1660 1661 PetscFunctionBegin; 1662 /* Create SF where leaves are input rows and roots are owned rows */ 1663 PetscCall(PetscMalloc1(n, &lrows)); 1664 for (r = 0; r < n; ++r) lrows[r] = -1; 1665 PetscCall(PetscMalloc1(N, &rrows)); 1666 for (r = 0; r < N; ++r) { 1667 const PetscInt idx = rows[r]; 1668 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1669 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1670 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1671 } 1672 rrows[r].rank = p; 1673 rrows[r].index = rows[r] - owners[p]; 1674 } 1675 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1676 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1677 /* Collect flags for rows to be zeroed */ 1678 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1679 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1680 PetscCall(PetscSFDestroy(&sf)); 1681 /* Compress and put in row numbers */ 1682 for (r = 0; r < n; ++r) 1683 if (lrows[r] >= 0) lrows[len++] = r; 1684 /* zero diagonal part of matrix */ 1685 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1686 /* handle off-diagonal part of matrix */ 1687 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1688 PetscCall(VecDuplicate(l->lvec, &lmask)); 1689 PetscCall(VecGetArray(xmask, &bb)); 1690 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1691 PetscCall(VecRestoreArray(xmask, &bb)); 1692 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1693 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1694 PetscCall(VecDestroy(&xmask)); 1695 if (x) { 1696 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1697 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1698 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1699 PetscCall(VecGetArray(b, &bb)); 1700 } 1701 PetscCall(VecGetArray(lmask, &mask)); 1702 /* remove zeroed rows of off-diagonal matrix */ 1703 for (i = 0; i < len; ++i) { 1704 row = lrows[i]; 1705 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1706 aa = ((MatScalar *)baij->a) + baij->i[row / bs] * bs2 + (row % bs); 1707 for (k = 0; k < count; ++k) { 1708 aa[0] = 0.0; 1709 aa += bs; 1710 } 1711 } 1712 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1713 for (i = 0; i < l->B->rmap->N; ++i) { 1714 row = i / bs; 1715 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1716 for (k = 0; k < bs; ++k) { 1717 col = bs * baij->j[j] + k; 1718 if (PetscAbsScalar(mask[col])) { 1719 aa = ((MatScalar *)baij->a) + j * bs2 + (i % bs) + bs * k; 1720 if (x) bb[i] -= aa[0] * xx[col]; 1721 aa[0] = 0.0; 1722 } 1723 } 1724 } 1725 } 1726 if (x) { 1727 PetscCall(VecRestoreArray(b, &bb)); 1728 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1729 } 1730 PetscCall(VecRestoreArray(lmask, &mask)); 1731 PetscCall(VecDestroy(&lmask)); 1732 PetscCall(PetscFree(lrows)); 1733 1734 /* only change matrix nonzero state if pattern was allowed to be changed */ 1735 if (!((Mat_SeqBAIJ *)l->A->data)->nonew) { 1736 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1737 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1738 } 1739 PetscFunctionReturn(PETSC_SUCCESS); 1740 } 1741 1742 static PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1743 { 1744 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1745 1746 PetscFunctionBegin; 1747 PetscCall(MatSetUnfactored(a->A)); 1748 PetscFunctionReturn(PETSC_SUCCESS); 1749 } 1750 1751 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1752 1753 static PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1754 { 1755 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1756 Mat a, b, c, d; 1757 PetscBool flg; 1758 1759 PetscFunctionBegin; 1760 a = matA->A; 1761 b = matA->B; 1762 c = matB->A; 1763 d = matB->B; 1764 1765 PetscCall(MatEqual(a, c, &flg)); 1766 if (flg) PetscCall(MatEqual(b, d, &flg)); 1767 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1768 PetscFunctionReturn(PETSC_SUCCESS); 1769 } 1770 1771 static PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1772 { 1773 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1774 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1775 1776 PetscFunctionBegin; 1777 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1778 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1779 PetscCall(MatCopy_Basic(A, B, str)); 1780 } else { 1781 PetscCall(MatCopy(a->A, b->A, str)); 1782 PetscCall(MatCopy(a->B, b->B, str)); 1783 } 1784 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1785 PetscFunctionReturn(PETSC_SUCCESS); 1786 } 1787 1788 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1789 { 1790 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1791 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1792 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1793 1794 PetscFunctionBegin; 1795 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1796 PetscFunctionReturn(PETSC_SUCCESS); 1797 } 1798 1799 static PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1800 { 1801 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1802 PetscBLASInt bnz, one = 1; 1803 Mat_SeqBAIJ *x, *y; 1804 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1805 1806 PetscFunctionBegin; 1807 if (str == SAME_NONZERO_PATTERN) { 1808 PetscScalar alpha = a; 1809 x = (Mat_SeqBAIJ *)xx->A->data; 1810 y = (Mat_SeqBAIJ *)yy->A->data; 1811 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1812 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1813 x = (Mat_SeqBAIJ *)xx->B->data; 1814 y = (Mat_SeqBAIJ *)yy->B->data; 1815 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1816 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1817 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1818 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1819 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1820 } else { 1821 Mat B; 1822 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1823 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1824 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1825 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1826 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1827 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1828 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1829 PetscCall(MatSetType(B, MATMPIBAIJ)); 1830 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1831 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1832 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1833 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1834 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1835 PetscCall(MatHeaderMerge(Y, &B)); 1836 PetscCall(PetscFree(nnz_d)); 1837 PetscCall(PetscFree(nnz_o)); 1838 } 1839 PetscFunctionReturn(PETSC_SUCCESS); 1840 } 1841 1842 static PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1843 { 1844 PetscFunctionBegin; 1845 if (PetscDefined(USE_COMPLEX)) { 1846 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1847 1848 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1849 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1850 } 1851 PetscFunctionReturn(PETSC_SUCCESS); 1852 } 1853 1854 static PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1855 { 1856 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1857 1858 PetscFunctionBegin; 1859 PetscCall(MatRealPart(a->A)); 1860 PetscCall(MatRealPart(a->B)); 1861 PetscFunctionReturn(PETSC_SUCCESS); 1862 } 1863 1864 static PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1865 { 1866 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1867 1868 PetscFunctionBegin; 1869 PetscCall(MatImaginaryPart(a->A)); 1870 PetscCall(MatImaginaryPart(a->B)); 1871 PetscFunctionReturn(PETSC_SUCCESS); 1872 } 1873 1874 static PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1875 { 1876 IS iscol_local; 1877 PetscInt csize; 1878 1879 PetscFunctionBegin; 1880 PetscCall(ISGetLocalSize(iscol, &csize)); 1881 if (call == MAT_REUSE_MATRIX) { 1882 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1883 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1884 } else { 1885 PetscCall(ISAllGather(iscol, &iscol_local)); 1886 } 1887 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat, PETSC_FALSE)); 1888 if (call == MAT_INITIAL_MATRIX) { 1889 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1890 PetscCall(ISDestroy(&iscol_local)); 1891 } 1892 PetscFunctionReturn(PETSC_SUCCESS); 1893 } 1894 1895 /* 1896 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1897 in local and then by concatenating the local matrices the end result. 1898 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1899 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1900 */ 1901 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat, PetscBool sym) 1902 { 1903 PetscMPIInt rank, size; 1904 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1905 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1906 Mat M, Mreuse; 1907 MatScalar *vwork, *aa; 1908 MPI_Comm comm; 1909 IS isrow_new, iscol_new; 1910 Mat_SeqBAIJ *aij; 1911 1912 PetscFunctionBegin; 1913 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1914 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1915 PetscCallMPI(MPI_Comm_size(comm, &size)); 1916 /* The compression and expansion should be avoided. Doesn't point 1917 out errors, might change the indices, hence buggey */ 1918 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1919 if (isrow == iscol) { 1920 iscol_new = isrow_new; 1921 PetscCall(PetscObjectReference((PetscObject)iscol_new)); 1922 } else PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1923 1924 if (call == MAT_REUSE_MATRIX) { 1925 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1926 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1927 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse, sym)); 1928 } else { 1929 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse, sym)); 1930 } 1931 PetscCall(ISDestroy(&isrow_new)); 1932 PetscCall(ISDestroy(&iscol_new)); 1933 /* 1934 m - number of local rows 1935 n - number of columns (same on all processors) 1936 rstart - first row in new global matrix generated 1937 */ 1938 PetscCall(MatGetBlockSize(mat, &bs)); 1939 PetscCall(MatGetSize(Mreuse, &m, &n)); 1940 m = m / bs; 1941 n = n / bs; 1942 1943 if (call == MAT_INITIAL_MATRIX) { 1944 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1945 ii = aij->i; 1946 jj = aij->j; 1947 1948 /* 1949 Determine the number of non-zeros in the diagonal and off-diagonal 1950 portions of the matrix in order to do correct preallocation 1951 */ 1952 1953 /* first get start and end of "diagonal" columns */ 1954 if (csize == PETSC_DECIDE) { 1955 PetscCall(ISGetSize(isrow, &mglobal)); 1956 if (mglobal == n * bs) { /* square matrix */ 1957 nlocal = m; 1958 } else { 1959 nlocal = n / size + ((n % size) > rank); 1960 } 1961 } else { 1962 nlocal = csize / bs; 1963 } 1964 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1965 rstart = rend - nlocal; 1966 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1967 1968 /* next, compute all the lengths */ 1969 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1970 for (i = 0; i < m; i++) { 1971 jend = ii[i + 1] - ii[i]; 1972 olen = 0; 1973 dlen = 0; 1974 for (j = 0; j < jend; j++) { 1975 if (*jj < rstart || *jj >= rend) olen++; 1976 else dlen++; 1977 jj++; 1978 } 1979 olens[i] = olen; 1980 dlens[i] = dlen; 1981 } 1982 PetscCall(MatCreate(comm, &M)); 1983 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1984 PetscCall(MatSetType(M, sym ? ((PetscObject)mat)->type_name : MATMPIBAIJ)); 1985 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1986 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1987 PetscCall(PetscFree2(dlens, olens)); 1988 } else { 1989 PetscInt ml, nl; 1990 1991 M = *newmat; 1992 PetscCall(MatGetLocalSize(M, &ml, &nl)); 1993 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 1994 PetscCall(MatZeroEntries(M)); 1995 /* 1996 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1997 rather than the slower MatSetValues(). 1998 */ 1999 M->was_assembled = PETSC_TRUE; 2000 M->assembled = PETSC_FALSE; 2001 } 2002 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2003 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2004 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2005 ii = aij->i; 2006 jj = aij->j; 2007 aa = aij->a; 2008 for (i = 0; i < m; i++) { 2009 row = rstart / bs + i; 2010 nz = ii[i + 1] - ii[i]; 2011 cwork = jj; 2012 jj = PetscSafePointerPlusOffset(jj, nz); 2013 vwork = aa; 2014 aa = PetscSafePointerPlusOffset(aa, nz * bs * bs); 2015 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2016 } 2017 2018 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2019 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2020 *newmat = M; 2021 2022 /* save submatrix used in processor for next request */ 2023 if (call == MAT_INITIAL_MATRIX) { 2024 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2025 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2026 } 2027 PetscFunctionReturn(PETSC_SUCCESS); 2028 } 2029 2030 static PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2031 { 2032 MPI_Comm comm, pcomm; 2033 PetscInt clocal_size, nrows; 2034 const PetscInt *rows; 2035 PetscMPIInt size; 2036 IS crowp, lcolp; 2037 2038 PetscFunctionBegin; 2039 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2040 /* make a collective version of 'rowp' */ 2041 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2042 if (pcomm == comm) { 2043 crowp = rowp; 2044 } else { 2045 PetscCall(ISGetSize(rowp, &nrows)); 2046 PetscCall(ISGetIndices(rowp, &rows)); 2047 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2048 PetscCall(ISRestoreIndices(rowp, &rows)); 2049 } 2050 PetscCall(ISSetPermutation(crowp)); 2051 /* make a local version of 'colp' */ 2052 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2053 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2054 if (size == 1) { 2055 lcolp = colp; 2056 } else { 2057 PetscCall(ISAllGather(colp, &lcolp)); 2058 } 2059 PetscCall(ISSetPermutation(lcolp)); 2060 /* now we just get the submatrix */ 2061 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2062 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B, PETSC_FALSE)); 2063 /* clean up */ 2064 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2065 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2066 PetscFunctionReturn(PETSC_SUCCESS); 2067 } 2068 2069 static PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2070 { 2071 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2072 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2073 2074 PetscFunctionBegin; 2075 if (nghosts) *nghosts = B->nbs; 2076 if (ghosts) *ghosts = baij->garray; 2077 PetscFunctionReturn(PETSC_SUCCESS); 2078 } 2079 2080 static PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2081 { 2082 Mat B; 2083 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2084 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2085 Mat_SeqAIJ *b; 2086 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2087 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2088 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2089 2090 PetscFunctionBegin; 2091 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2092 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2093 2094 /* Tell every processor the number of nonzeros per row */ 2095 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2096 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2097 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2098 displs = recvcounts + size; 2099 for (i = 0; i < size; i++) { 2100 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2101 displs[i] = A->rmap->range[i] / bs; 2102 } 2103 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2104 /* Create the sequential matrix of the same type as the local block diagonal */ 2105 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2106 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2107 PetscCall(MatSetType(B, MATSEQAIJ)); 2108 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2109 b = (Mat_SeqAIJ *)B->data; 2110 2111 /* Copy my part of matrix column indices over */ 2112 sendcount = ad->nz + bd->nz; 2113 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2114 a_jsendbuf = ad->j; 2115 b_jsendbuf = bd->j; 2116 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2117 cnt = 0; 2118 for (i = 0; i < n; i++) { 2119 /* put in lower diagonal portion */ 2120 m = bd->i[i + 1] - bd->i[i]; 2121 while (m > 0) { 2122 /* is it above diagonal (in bd (compressed) numbering) */ 2123 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2124 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2125 m--; 2126 } 2127 2128 /* put in diagonal portion */ 2129 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2130 2131 /* put in upper diagonal portion */ 2132 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2133 } 2134 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2135 2136 /* Gather all column indices to all processors */ 2137 for (i = 0; i < size; i++) { 2138 recvcounts[i] = 0; 2139 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2140 } 2141 displs[0] = 0; 2142 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2143 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2144 /* Assemble the matrix into usable form (note numerical values not yet set) */ 2145 /* set the b->ilen (length of each row) values */ 2146 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2147 /* set the b->i indices */ 2148 b->i[0] = 0; 2149 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2150 PetscCall(PetscFree(lens)); 2151 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2152 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2153 PetscCall(PetscFree(recvcounts)); 2154 2155 PetscCall(MatPropagateSymmetryOptions(A, B)); 2156 *newmat = B; 2157 PetscFunctionReturn(PETSC_SUCCESS); 2158 } 2159 2160 static PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2161 { 2162 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2163 Vec bb1 = NULL; 2164 2165 PetscFunctionBegin; 2166 if (flag == SOR_APPLY_UPPER) { 2167 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2168 PetscFunctionReturn(PETSC_SUCCESS); 2169 } 2170 2171 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2172 2173 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2174 if (flag & SOR_ZERO_INITIAL_GUESS) { 2175 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2176 its--; 2177 } 2178 2179 while (its--) { 2180 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2181 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2182 2183 /* update rhs: bb1 = bb - B*x */ 2184 PetscCall(VecScale(mat->lvec, -1.0)); 2185 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2186 2187 /* local sweep */ 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2189 } 2190 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2191 if (flag & SOR_ZERO_INITIAL_GUESS) { 2192 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2193 its--; 2194 } 2195 while (its--) { 2196 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2197 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2198 2199 /* update rhs: bb1 = bb - B*x */ 2200 PetscCall(VecScale(mat->lvec, -1.0)); 2201 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2202 2203 /* local sweep */ 2204 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2205 } 2206 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2207 if (flag & SOR_ZERO_INITIAL_GUESS) { 2208 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2209 its--; 2210 } 2211 while (its--) { 2212 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2213 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2214 2215 /* update rhs: bb1 = bb - B*x */ 2216 PetscCall(VecScale(mat->lvec, -1.0)); 2217 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2218 2219 /* local sweep */ 2220 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2221 } 2222 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2223 2224 PetscCall(VecDestroy(&bb1)); 2225 PetscFunctionReturn(PETSC_SUCCESS); 2226 } 2227 2228 static PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2229 { 2230 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2231 PetscInt m, N, i, *garray = aij->garray; 2232 PetscInt ib, jb, bs = A->rmap->bs; 2233 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2234 MatScalar *a_val = a_aij->a; 2235 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2236 MatScalar *b_val = b_aij->a; 2237 PetscReal *work; 2238 2239 PetscFunctionBegin; 2240 PetscCall(MatGetSize(A, &m, &N)); 2241 PetscCall(PetscCalloc1(N, &work)); 2242 if (type == NORM_2) { 2243 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2244 for (jb = 0; jb < bs; jb++) { 2245 for (ib = 0; ib < bs; ib++) { 2246 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2247 a_val++; 2248 } 2249 } 2250 } 2251 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2252 for (jb = 0; jb < bs; jb++) { 2253 for (ib = 0; ib < bs; ib++) { 2254 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2255 b_val++; 2256 } 2257 } 2258 } 2259 } else if (type == NORM_1) { 2260 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2261 for (jb = 0; jb < bs; jb++) { 2262 for (ib = 0; ib < bs; ib++) { 2263 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2264 a_val++; 2265 } 2266 } 2267 } 2268 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2269 for (jb = 0; jb < bs; jb++) { 2270 for (ib = 0; ib < bs; ib++) { 2271 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2272 b_val++; 2273 } 2274 } 2275 } 2276 } else if (type == NORM_INFINITY) { 2277 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2278 for (jb = 0; jb < bs; jb++) { 2279 for (ib = 0; ib < bs; ib++) { 2280 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2281 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2282 a_val++; 2283 } 2284 } 2285 } 2286 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2287 for (jb = 0; jb < bs; jb++) { 2288 for (ib = 0; ib < bs; ib++) { 2289 int col = garray[b_aij->j[i]] * bs + jb; 2290 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2291 b_val++; 2292 } 2293 } 2294 } 2295 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2296 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2297 for (jb = 0; jb < bs; jb++) { 2298 for (ib = 0; ib < bs; ib++) { 2299 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2300 a_val++; 2301 } 2302 } 2303 } 2304 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2305 for (jb = 0; jb < bs; jb++) { 2306 for (ib = 0; ib < bs; ib++) { 2307 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2308 b_val++; 2309 } 2310 } 2311 } 2312 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2313 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2314 for (jb = 0; jb < bs; jb++) { 2315 for (ib = 0; ib < bs; ib++) { 2316 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2317 a_val++; 2318 } 2319 } 2320 } 2321 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2322 for (jb = 0; jb < bs; jb++) { 2323 for (ib = 0; ib < bs; ib++) { 2324 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2325 b_val++; 2326 } 2327 } 2328 } 2329 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2330 if (type == NORM_INFINITY) { 2331 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2332 } else { 2333 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2334 } 2335 PetscCall(PetscFree(work)); 2336 if (type == NORM_2) { 2337 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2338 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2339 for (i = 0; i < N; i++) reductions[i] /= m; 2340 } 2341 PetscFunctionReturn(PETSC_SUCCESS); 2342 } 2343 2344 static PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2345 { 2346 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2347 2348 PetscFunctionBegin; 2349 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2350 A->factorerrortype = a->A->factorerrortype; 2351 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2352 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2353 PetscFunctionReturn(PETSC_SUCCESS); 2354 } 2355 2356 static PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2357 { 2358 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2359 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2360 2361 PetscFunctionBegin; 2362 if (!Y->preallocated) { 2363 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2364 } else if (!aij->nz) { 2365 PetscInt nonew = aij->nonew; 2366 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2367 aij->nonew = nonew; 2368 } 2369 PetscCall(MatShift_Basic(Y, a)); 2370 PetscFunctionReturn(PETSC_SUCCESS); 2371 } 2372 2373 static PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2374 { 2375 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2376 2377 PetscFunctionBegin; 2378 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2379 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2380 if (d) { 2381 PetscInt rstart; 2382 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2383 *d += rstart / A->rmap->bs; 2384 } 2385 PetscFunctionReturn(PETSC_SUCCESS); 2386 } 2387 2388 static PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2389 { 2390 PetscFunctionBegin; 2391 *a = ((Mat_MPIBAIJ *)A->data)->A; 2392 PetscFunctionReturn(PETSC_SUCCESS); 2393 } 2394 2395 static PetscErrorCode MatEliminateZeros_MPIBAIJ(Mat A, PetscBool keep) 2396 { 2397 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2398 2399 PetscFunctionBegin; 2400 PetscCall(MatEliminateZeros_SeqBAIJ(a->A, keep)); // possibly keep zero diagonal coefficients 2401 PetscCall(MatEliminateZeros_SeqBAIJ(a->B, PETSC_FALSE)); // never keep zero diagonal coefficients 2402 PetscFunctionReturn(PETSC_SUCCESS); 2403 } 2404 2405 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2406 MatGetRow_MPIBAIJ, 2407 MatRestoreRow_MPIBAIJ, 2408 MatMult_MPIBAIJ, 2409 /* 4*/ MatMultAdd_MPIBAIJ, 2410 MatMultTranspose_MPIBAIJ, 2411 MatMultTransposeAdd_MPIBAIJ, 2412 NULL, 2413 NULL, 2414 NULL, 2415 /*10*/ NULL, 2416 NULL, 2417 NULL, 2418 MatSOR_MPIBAIJ, 2419 MatTranspose_MPIBAIJ, 2420 /*15*/ MatGetInfo_MPIBAIJ, 2421 MatEqual_MPIBAIJ, 2422 MatGetDiagonal_MPIBAIJ, 2423 MatDiagonalScale_MPIBAIJ, 2424 MatNorm_MPIBAIJ, 2425 /*20*/ MatAssemblyBegin_MPIBAIJ, 2426 MatAssemblyEnd_MPIBAIJ, 2427 MatSetOption_MPIBAIJ, 2428 MatZeroEntries_MPIBAIJ, 2429 /*24*/ MatZeroRows_MPIBAIJ, 2430 NULL, 2431 NULL, 2432 NULL, 2433 NULL, 2434 /*29*/ MatSetUp_MPI_Hash, 2435 NULL, 2436 NULL, 2437 MatGetDiagonalBlock_MPIBAIJ, 2438 NULL, 2439 /*34*/ MatDuplicate_MPIBAIJ, 2440 NULL, 2441 NULL, 2442 NULL, 2443 NULL, 2444 /*39*/ MatAXPY_MPIBAIJ, 2445 MatCreateSubMatrices_MPIBAIJ, 2446 MatIncreaseOverlap_MPIBAIJ, 2447 MatGetValues_MPIBAIJ, 2448 MatCopy_MPIBAIJ, 2449 /*44*/ NULL, 2450 MatScale_MPIBAIJ, 2451 MatShift_MPIBAIJ, 2452 NULL, 2453 MatZeroRowsColumns_MPIBAIJ, 2454 /*49*/ NULL, 2455 NULL, 2456 NULL, 2457 NULL, 2458 NULL, 2459 /*54*/ MatFDColoringCreate_MPIXAIJ, 2460 NULL, 2461 MatSetUnfactored_MPIBAIJ, 2462 MatPermute_MPIBAIJ, 2463 MatSetValuesBlocked_MPIBAIJ, 2464 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2465 MatDestroy_MPIBAIJ, 2466 MatView_MPIBAIJ, 2467 NULL, 2468 NULL, 2469 /*64*/ NULL, 2470 NULL, 2471 NULL, 2472 NULL, 2473 NULL, 2474 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2475 NULL, 2476 NULL, 2477 NULL, 2478 NULL, 2479 /*74*/ NULL, 2480 MatFDColoringApply_BAIJ, 2481 NULL, 2482 NULL, 2483 NULL, 2484 /*79*/ NULL, 2485 NULL, 2486 NULL, 2487 NULL, 2488 MatLoad_MPIBAIJ, 2489 /*84*/ NULL, 2490 NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 /*89*/ NULL, 2495 NULL, 2496 NULL, 2497 NULL, 2498 NULL, 2499 /*94*/ NULL, 2500 NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 /*99*/ NULL, 2505 NULL, 2506 NULL, 2507 MatConjugate_MPIBAIJ, 2508 NULL, 2509 /*104*/ NULL, 2510 MatRealPart_MPIBAIJ, 2511 MatImaginaryPart_MPIBAIJ, 2512 NULL, 2513 NULL, 2514 /*109*/ NULL, 2515 NULL, 2516 NULL, 2517 NULL, 2518 MatMissingDiagonal_MPIBAIJ, 2519 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2520 NULL, 2521 MatGetGhosts_MPIBAIJ, 2522 NULL, 2523 NULL, 2524 /*119*/ NULL, 2525 NULL, 2526 NULL, 2527 NULL, 2528 MatGetMultiProcBlock_MPIBAIJ, 2529 /*124*/ NULL, 2530 MatGetColumnReductions_MPIBAIJ, 2531 MatInvertBlockDiagonal_MPIBAIJ, 2532 NULL, 2533 NULL, 2534 /*129*/ NULL, 2535 NULL, 2536 NULL, 2537 NULL, 2538 NULL, 2539 /*134*/ NULL, 2540 NULL, 2541 NULL, 2542 NULL, 2543 NULL, 2544 /*139*/ MatSetBlockSizes_Default, 2545 NULL, 2546 NULL, 2547 MatFDColoringSetUp_MPIXAIJ, 2548 NULL, 2549 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2550 NULL, 2551 NULL, 2552 NULL, 2553 NULL, 2554 NULL, 2555 /*150*/ NULL, 2556 MatEliminateZeros_MPIBAIJ, 2557 NULL}; 2558 2559 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2560 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2561 2562 static PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2563 { 2564 PetscInt m, rstart, cstart, cend; 2565 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2566 const PetscInt *JJ = NULL; 2567 PetscScalar *values = NULL; 2568 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2569 PetscBool nooffprocentries; 2570 2571 PetscFunctionBegin; 2572 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2573 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2574 PetscCall(PetscLayoutSetUp(B->rmap)); 2575 PetscCall(PetscLayoutSetUp(B->cmap)); 2576 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2577 m = B->rmap->n / bs; 2578 rstart = B->rmap->rstart / bs; 2579 cstart = B->cmap->rstart / bs; 2580 cend = B->cmap->rend / bs; 2581 2582 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2583 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2584 for (i = 0; i < m; i++) { 2585 nz = ii[i + 1] - ii[i]; 2586 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2587 nz_max = PetscMax(nz_max, nz); 2588 dlen = 0; 2589 olen = 0; 2590 JJ = jj + ii[i]; 2591 for (j = 0; j < nz; j++) { 2592 if (*JJ < cstart || *JJ >= cend) olen++; 2593 else dlen++; 2594 JJ++; 2595 } 2596 d_nnz[i] = dlen; 2597 o_nnz[i] = olen; 2598 } 2599 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2600 PetscCall(PetscFree2(d_nnz, o_nnz)); 2601 2602 values = (PetscScalar *)V; 2603 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2604 for (i = 0; i < m; i++) { 2605 PetscInt row = i + rstart; 2606 PetscInt ncols = ii[i + 1] - ii[i]; 2607 const PetscInt *icols = jj + ii[i]; 2608 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2609 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2610 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2611 } else { /* block ordering does not match so we can only insert one block at a time. */ 2612 PetscInt j; 2613 for (j = 0; j < ncols; j++) { 2614 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2615 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2616 } 2617 } 2618 } 2619 2620 if (!V) PetscCall(PetscFree(values)); 2621 nooffprocentries = B->nooffprocentries; 2622 B->nooffprocentries = PETSC_TRUE; 2623 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2624 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2625 B->nooffprocentries = nooffprocentries; 2626 2627 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2628 PetscFunctionReturn(PETSC_SUCCESS); 2629 } 2630 2631 /*@C 2632 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2633 2634 Collective 2635 2636 Input Parameters: 2637 + B - the matrix 2638 . bs - the block size 2639 . i - the indices into `j` for the start of each local row (starts with zero) 2640 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2641 - v - optional values in the matrix, use `NULL` if not provided 2642 2643 Level: advanced 2644 2645 Notes: 2646 The `i`, `j`, and `v` arrays ARE copied by this routine into the internal format used by PETSc; 2647 thus you CANNOT change the matrix entries by changing the values of `v` after you have 2648 called this routine. 2649 2650 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2651 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2652 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2653 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2654 block column and the second index is over columns within a block. 2655 2656 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2657 2658 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MATMPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MATMPIBAIJ` 2659 @*/ 2660 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2661 { 2662 PetscFunctionBegin; 2663 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2664 PetscValidType(B, 1); 2665 PetscValidLogicalCollectiveInt(B, bs, 2); 2666 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2667 PetscFunctionReturn(PETSC_SUCCESS); 2668 } 2669 2670 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2671 { 2672 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2673 PetscInt i; 2674 PetscMPIInt size; 2675 2676 PetscFunctionBegin; 2677 if (B->hash_active) { 2678 B->ops[0] = b->cops; 2679 B->hash_active = PETSC_FALSE; 2680 } 2681 if (!B->preallocated) PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2682 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2683 PetscCall(PetscLayoutSetUp(B->rmap)); 2684 PetscCall(PetscLayoutSetUp(B->cmap)); 2685 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2686 2687 if (d_nnz) { 2688 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2689 } 2690 if (o_nnz) { 2691 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2692 } 2693 2694 b->bs2 = bs * bs; 2695 b->mbs = B->rmap->n / bs; 2696 b->nbs = B->cmap->n / bs; 2697 b->Mbs = B->rmap->N / bs; 2698 b->Nbs = B->cmap->N / bs; 2699 2700 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2701 b->rstartbs = B->rmap->rstart / bs; 2702 b->rendbs = B->rmap->rend / bs; 2703 b->cstartbs = B->cmap->rstart / bs; 2704 b->cendbs = B->cmap->rend / bs; 2705 2706 #if defined(PETSC_USE_CTABLE) 2707 PetscCall(PetscHMapIDestroy(&b->colmap)); 2708 #else 2709 PetscCall(PetscFree(b->colmap)); 2710 #endif 2711 PetscCall(PetscFree(b->garray)); 2712 PetscCall(VecDestroy(&b->lvec)); 2713 PetscCall(VecScatterDestroy(&b->Mvctx)); 2714 2715 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2716 2717 MatSeqXAIJGetOptions_Private(b->B); 2718 PetscCall(MatDestroy(&b->B)); 2719 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2720 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2721 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2722 MatSeqXAIJRestoreOptions_Private(b->B); 2723 2724 MatSeqXAIJGetOptions_Private(b->A); 2725 PetscCall(MatDestroy(&b->A)); 2726 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2727 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2728 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2729 MatSeqXAIJRestoreOptions_Private(b->A); 2730 2731 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2732 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2733 B->preallocated = PETSC_TRUE; 2734 B->was_assembled = PETSC_FALSE; 2735 B->assembled = PETSC_FALSE; 2736 PetscFunctionReturn(PETSC_SUCCESS); 2737 } 2738 2739 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2740 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2741 2742 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2743 { 2744 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2745 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2746 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2747 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2748 2749 PetscFunctionBegin; 2750 PetscCall(PetscMalloc1(M + 1, &ii)); 2751 ii[0] = 0; 2752 for (i = 0; i < M; i++) { 2753 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2754 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2755 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2756 /* remove one from count of matrix has diagonal */ 2757 for (j = id[i]; j < id[i + 1]; j++) { 2758 if (jd[j] == i) { 2759 ii[i + 1]--; 2760 break; 2761 } 2762 } 2763 } 2764 PetscCall(PetscMalloc1(ii[M], &jj)); 2765 cnt = 0; 2766 for (i = 0; i < M; i++) { 2767 for (j = io[i]; j < io[i + 1]; j++) { 2768 if (garray[jo[j]] > rstart) break; 2769 jj[cnt++] = garray[jo[j]]; 2770 } 2771 for (k = id[i]; k < id[i + 1]; k++) { 2772 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2773 } 2774 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2775 } 2776 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2777 PetscFunctionReturn(PETSC_SUCCESS); 2778 } 2779 2780 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2781 2782 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2783 2784 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2785 { 2786 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2787 Mat_MPIAIJ *b; 2788 Mat B; 2789 2790 PetscFunctionBegin; 2791 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2792 2793 if (reuse == MAT_REUSE_MATRIX) { 2794 B = *newmat; 2795 } else { 2796 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2797 PetscCall(MatSetType(B, MATMPIAIJ)); 2798 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2799 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2800 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2801 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2802 } 2803 b = (Mat_MPIAIJ *)B->data; 2804 2805 if (reuse == MAT_REUSE_MATRIX) { 2806 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2807 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2808 } else { 2809 PetscInt *garray = a->garray; 2810 Mat_SeqAIJ *bB; 2811 PetscInt bs, nnz; 2812 PetscCall(MatDestroy(&b->A)); 2813 PetscCall(MatDestroy(&b->B)); 2814 /* just clear out the data structure */ 2815 PetscCall(MatDisAssemble_MPIAIJ(B)); 2816 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2818 2819 /* Global numbering for b->B columns */ 2820 bB = (Mat_SeqAIJ *)b->B->data; 2821 bs = A->rmap->bs; 2822 nnz = bB->i[A->rmap->n]; 2823 for (PetscInt k = 0; k < nnz; k++) { 2824 PetscInt bj = bB->j[k] / bs; 2825 PetscInt br = bB->j[k] % bs; 2826 bB->j[k] = garray[bj] * bs + br; 2827 } 2828 } 2829 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2830 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2831 2832 if (reuse == MAT_INPLACE_MATRIX) { 2833 PetscCall(MatHeaderReplace(A, &B)); 2834 } else { 2835 *newmat = B; 2836 } 2837 PetscFunctionReturn(PETSC_SUCCESS); 2838 } 2839 2840 /*MC 2841 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2842 2843 Options Database Keys: 2844 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2845 . -mat_block_size <bs> - set the blocksize used to store the matrix 2846 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2847 - -mat_use_hash_table <fact> - set hash table factor 2848 2849 Level: beginner 2850 2851 Note: 2852 `MatSetOption(A, MAT_STRUCTURE_ONLY, PETSC_TRUE)` may be called for this matrix type. In this no 2853 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2854 2855 .seealso: `Mat`, `MATBAIJ`, `MATSEQBAIJ`, `MatCreateBAIJ` 2856 M*/ 2857 2858 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2859 2860 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2861 { 2862 Mat_MPIBAIJ *b; 2863 PetscBool flg = PETSC_FALSE; 2864 2865 PetscFunctionBegin; 2866 PetscCall(PetscNew(&b)); 2867 B->data = (void *)b; 2868 B->ops[0] = MatOps_Values; 2869 B->assembled = PETSC_FALSE; 2870 2871 B->insertmode = NOT_SET_VALUES; 2872 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2873 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2874 2875 /* build local table of row and column ownerships */ 2876 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2877 2878 /* build cache for off array entries formed */ 2879 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2880 2881 b->donotstash = PETSC_FALSE; 2882 b->colmap = NULL; 2883 b->garray = NULL; 2884 b->roworiented = PETSC_TRUE; 2885 2886 /* stuff used in block assembly */ 2887 b->barray = NULL; 2888 2889 /* stuff used for matrix vector multiply */ 2890 b->lvec = NULL; 2891 b->Mvctx = NULL; 2892 2893 /* stuff for MatGetRow() */ 2894 b->rowindices = NULL; 2895 b->rowvalues = NULL; 2896 b->getrowactive = PETSC_FALSE; 2897 2898 /* hash table stuff */ 2899 b->ht = NULL; 2900 b->hd = NULL; 2901 b->ht_size = 0; 2902 b->ht_flag = PETSC_FALSE; 2903 b->ht_fact = 0; 2904 b->ht_total_ct = 0; 2905 b->ht_insert_ct = 0; 2906 2907 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2908 b->ijonly = PETSC_FALSE; 2909 2910 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2911 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2912 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2913 #if defined(PETSC_HAVE_HYPRE) 2914 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2915 #endif 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2918 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2920 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2923 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2924 2925 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2926 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2927 if (flg) { 2928 PetscReal fact = 1.39; 2929 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2930 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2931 if (fact <= 1.0) fact = 1.39; 2932 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2933 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2934 } 2935 PetscOptionsEnd(); 2936 PetscFunctionReturn(PETSC_SUCCESS); 2937 } 2938 2939 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 2940 /*MC 2941 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2942 2943 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2944 and `MATMPIBAIJ` otherwise. 2945 2946 Options Database Keys: 2947 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2948 2949 Level: beginner 2950 2951 .seealso: `Mat`, `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2952 M*/ 2953 2954 /*@C 2955 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2956 (block compressed row). 2957 2958 Collective 2959 2960 Input Parameters: 2961 + B - the matrix 2962 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2963 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2964 . d_nz - number of block nonzeros per block row in diagonal portion of local 2965 submatrix (same for all local rows) 2966 . d_nnz - array containing the number of block nonzeros in the various block rows 2967 of the in diagonal portion of the local (possibly different for each block 2968 row) or `NULL`. If you plan to factor the matrix you must leave room for the diagonal entry and 2969 set it even if it is zero. 2970 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2971 submatrix (same for all local rows). 2972 - o_nnz - array containing the number of nonzeros in the various block rows of the 2973 off-diagonal portion of the local submatrix (possibly different for 2974 each block row) or `NULL`. 2975 2976 If the *_nnz parameter is given then the *_nz parameter is ignored 2977 2978 Options Database Keys: 2979 + -mat_block_size - size of the blocks to use 2980 - -mat_use_hash_table <fact> - set hash table factor 2981 2982 Level: intermediate 2983 2984 Notes: 2985 For good matrix assembly performance 2986 the user should preallocate the matrix storage by setting the parameters 2987 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 2988 performance can be increased by more than a factor of 50. 2989 2990 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2991 than it must be used on all processors that share the object for that argument. 2992 2993 Storage Information: 2994 For a square global matrix we define each processor's diagonal portion 2995 to be its local rows and the corresponding columns (a square submatrix); 2996 each processor's off-diagonal portion encompasses the remainder of the 2997 local matrix (a rectangular submatrix). 2998 2999 The user can specify preallocated storage for the diagonal part of 3000 the local submatrix with either `d_nz` or `d_nnz` (not both). Set 3001 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3002 memory allocation. Likewise, specify preallocated storage for the 3003 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3004 3005 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3006 the figure below we depict these three local rows and all columns (0-11). 3007 3008 .vb 3009 0 1 2 3 4 5 6 7 8 9 10 11 3010 -------------------------- 3011 row 3 |o o o d d d o o o o o o 3012 row 4 |o o o d d d o o o o o o 3013 row 5 |o o o d d d o o o o o o 3014 -------------------------- 3015 .ve 3016 3017 Thus, any entries in the d locations are stored in the d (diagonal) 3018 submatrix, and any entries in the o locations are stored in the 3019 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3020 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3021 3022 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3023 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3024 In general, for PDE problems in which most nonzeros are near the diagonal, 3025 one expects `d_nz` >> `o_nz`. 3026 3027 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3028 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3029 You can also run with the option `-info` and look for messages with the string 3030 malloc in them to see if additional memory allocation was needed. 3031 3032 .seealso: `Mat`, `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3033 @*/ 3034 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3035 { 3036 PetscFunctionBegin; 3037 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3038 PetscValidType(B, 1); 3039 PetscValidLogicalCollectiveInt(B, bs, 2); 3040 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3041 PetscFunctionReturn(PETSC_SUCCESS); 3042 } 3043 3044 // PetscClangLinter pragma disable: -fdoc-section-header-unknown 3045 /*@C 3046 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3047 (block compressed row). 3048 3049 Collective 3050 3051 Input Parameters: 3052 + comm - MPI communicator 3053 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3054 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3055 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3056 This value should be the same as the local size used in creating the 3057 y vector for the matrix-vector product y = Ax. 3058 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3059 This value should be the same as the local size used in creating the 3060 x vector for the matrix-vector product y = Ax. 3061 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3062 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3063 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3064 submatrix (same for all local rows) 3065 . d_nnz - array containing the number of nonzero blocks in the various block rows 3066 of the in diagonal portion of the local (possibly different for each block 3067 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3068 and set it even if it is zero. 3069 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3070 submatrix (same for all local rows). 3071 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3072 off-diagonal portion of the local submatrix (possibly different for 3073 each block row) or NULL. 3074 3075 Output Parameter: 3076 . A - the matrix 3077 3078 Options Database Keys: 3079 + -mat_block_size - size of the blocks to use 3080 - -mat_use_hash_table <fact> - set hash table factor 3081 3082 Level: intermediate 3083 3084 Notes: 3085 It is recommended that one use `MatCreateFromOptions()` or the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3086 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3087 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3088 3089 For good matrix assembly performance 3090 the user should preallocate the matrix storage by setting the parameters 3091 `d_nz` (or `d_nnz`) and `o_nz` (or `o_nnz`). By setting these parameters accurately, 3092 performance can be increased by more than a factor of 50. 3093 3094 If the *_nnz parameter is given then the *_nz parameter is ignored 3095 3096 A nonzero block is any block that as 1 or more nonzeros in it 3097 3098 The user MUST specify either the local or global matrix dimensions 3099 (possibly both). 3100 3101 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3102 than it must be used on all processors that share the object for that argument. 3103 3104 If `m` and `n` are not `PETSC_DECIDE`, then the values determine the `PetscLayout` of the matrix and the ranges returned by 3105 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, and `MatGetOwnershipRangesColumn()`. 3106 3107 Storage Information: 3108 For a square global matrix we define each processor's diagonal portion 3109 to be its local rows and the corresponding columns (a square submatrix); 3110 each processor's off-diagonal portion encompasses the remainder of the 3111 local matrix (a rectangular submatrix). 3112 3113 The user can specify preallocated storage for the diagonal part of 3114 the local submatrix with either d_nz or d_nnz (not both). Set 3115 `d_nz` = `PETSC_DEFAULT` and `d_nnz` = `NULL` for PETSc to control dynamic 3116 memory allocation. Likewise, specify preallocated storage for the 3117 off-diagonal part of the local submatrix with `o_nz` or `o_nnz` (not both). 3118 3119 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3120 the figure below we depict these three local rows and all columns (0-11). 3121 3122 .vb 3123 0 1 2 3 4 5 6 7 8 9 10 11 3124 -------------------------- 3125 row 3 |o o o d d d o o o o o o 3126 row 4 |o o o d d d o o o o o o 3127 row 5 |o o o d d d o o o o o o 3128 -------------------------- 3129 .ve 3130 3131 Thus, any entries in the d locations are stored in the d (diagonal) 3132 submatrix, and any entries in the o locations are stored in the 3133 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3134 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3135 3136 Now `d_nz` should indicate the number of block nonzeros per row in the d matrix, 3137 and `o_nz` should indicate the number of block nonzeros per row in the o matrix. 3138 In general, for PDE problems in which most nonzeros are near the diagonal, 3139 one expects `d_nz` >> `o_nz`. 3140 3141 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()`, 3142 `MatGetOwnershipRange()`, `MatGetOwnershipRanges()`, `MatGetOwnershipRangeColumn()`, `MatGetOwnershipRangesColumn()`, `PetscLayout` 3143 @*/ 3144 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3145 { 3146 PetscMPIInt size; 3147 3148 PetscFunctionBegin; 3149 PetscCall(MatCreate(comm, A)); 3150 PetscCall(MatSetSizes(*A, m, n, M, N)); 3151 PetscCallMPI(MPI_Comm_size(comm, &size)); 3152 if (size > 1) { 3153 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3154 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3155 } else { 3156 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3157 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3158 } 3159 PetscFunctionReturn(PETSC_SUCCESS); 3160 } 3161 3162 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3163 { 3164 Mat mat; 3165 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3166 PetscInt len = 0; 3167 3168 PetscFunctionBegin; 3169 *newmat = NULL; 3170 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3171 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3172 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3173 3174 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3175 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3176 if (matin->hash_active) { 3177 PetscCall(MatSetUp(mat)); 3178 } else { 3179 mat->factortype = matin->factortype; 3180 mat->preallocated = PETSC_TRUE; 3181 mat->assembled = PETSC_TRUE; 3182 mat->insertmode = NOT_SET_VALUES; 3183 3184 a = (Mat_MPIBAIJ *)mat->data; 3185 mat->rmap->bs = matin->rmap->bs; 3186 a->bs2 = oldmat->bs2; 3187 a->mbs = oldmat->mbs; 3188 a->nbs = oldmat->nbs; 3189 a->Mbs = oldmat->Mbs; 3190 a->Nbs = oldmat->Nbs; 3191 3192 a->size = oldmat->size; 3193 a->rank = oldmat->rank; 3194 a->donotstash = oldmat->donotstash; 3195 a->roworiented = oldmat->roworiented; 3196 a->rowindices = NULL; 3197 a->rowvalues = NULL; 3198 a->getrowactive = PETSC_FALSE; 3199 a->barray = NULL; 3200 a->rstartbs = oldmat->rstartbs; 3201 a->rendbs = oldmat->rendbs; 3202 a->cstartbs = oldmat->cstartbs; 3203 a->cendbs = oldmat->cendbs; 3204 3205 /* hash table stuff */ 3206 a->ht = NULL; 3207 a->hd = NULL; 3208 a->ht_size = 0; 3209 a->ht_flag = oldmat->ht_flag; 3210 a->ht_fact = oldmat->ht_fact; 3211 a->ht_total_ct = 0; 3212 a->ht_insert_ct = 0; 3213 3214 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3215 if (oldmat->colmap) { 3216 #if defined(PETSC_USE_CTABLE) 3217 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3218 #else 3219 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3220 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3221 #endif 3222 } else a->colmap = NULL; 3223 3224 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)oldmat->B->data)->nbs)) { 3225 PetscCall(PetscMalloc1(len, &a->garray)); 3226 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3227 } else a->garray = NULL; 3228 3229 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3230 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3231 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3232 3233 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3234 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3235 } 3236 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3237 *newmat = mat; 3238 PetscFunctionReturn(PETSC_SUCCESS); 3239 } 3240 3241 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3242 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3243 { 3244 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3245 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3246 PetscScalar *matvals; 3247 3248 PetscFunctionBegin; 3249 PetscCall(PetscViewerSetUp(viewer)); 3250 3251 /* read in matrix header */ 3252 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3253 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3254 M = header[1]; 3255 N = header[2]; 3256 nz = header[3]; 3257 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3258 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3259 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3260 3261 /* set block sizes from the viewer's .info file */ 3262 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3263 /* set local sizes if not set already */ 3264 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3265 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3266 /* set global sizes if not set already */ 3267 if (mat->rmap->N < 0) mat->rmap->N = M; 3268 if (mat->cmap->N < 0) mat->cmap->N = N; 3269 PetscCall(PetscLayoutSetUp(mat->rmap)); 3270 PetscCall(PetscLayoutSetUp(mat->cmap)); 3271 3272 /* check if the matrix sizes are correct */ 3273 PetscCall(MatGetSize(mat, &rows, &cols)); 3274 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3275 PetscCall(MatGetBlockSize(mat, &bs)); 3276 PetscCall(MatGetLocalSize(mat, &m, &n)); 3277 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3278 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3279 mbs = m / bs; 3280 nbs = n / bs; 3281 3282 /* read in row lengths and build row indices */ 3283 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3284 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3285 rowidxs[0] = 0; 3286 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3287 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3288 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3289 3290 /* read in column indices and matrix values */ 3291 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3292 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3293 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3294 3295 { /* preallocate matrix storage */ 3296 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3297 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3298 PetscBool sbaij, done; 3299 PetscInt *d_nnz, *o_nnz; 3300 3301 PetscCall(PetscBTCreate(nbs, &bt)); 3302 PetscCall(PetscHSetICreate(&ht)); 3303 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3304 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3305 for (i = 0; i < mbs; i++) { 3306 PetscCall(PetscBTMemzero(nbs, bt)); 3307 PetscCall(PetscHSetIClear(ht)); 3308 for (k = 0; k < bs; k++) { 3309 PetscInt row = bs * i + k; 3310 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3311 PetscInt col = colidxs[j]; 3312 if (!sbaij || col >= row) { 3313 if (col >= cs && col < ce) { 3314 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3315 } else { 3316 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3317 if (done) o_nnz[i]++; 3318 } 3319 } 3320 } 3321 } 3322 } 3323 PetscCall(PetscBTDestroy(&bt)); 3324 PetscCall(PetscHSetIDestroy(&ht)); 3325 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3326 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3327 PetscCall(PetscFree2(d_nnz, o_nnz)); 3328 } 3329 3330 /* store matrix values */ 3331 for (i = 0; i < m; i++) { 3332 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3333 PetscUseTypeMethod(mat, setvalues, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES); 3334 } 3335 3336 PetscCall(PetscFree(rowidxs)); 3337 PetscCall(PetscFree2(colidxs, matvals)); 3338 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3339 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3340 PetscFunctionReturn(PETSC_SUCCESS); 3341 } 3342 3343 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3344 { 3345 PetscBool isbinary; 3346 3347 PetscFunctionBegin; 3348 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3349 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3350 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3351 PetscFunctionReturn(PETSC_SUCCESS); 3352 } 3353 3354 /*@ 3355 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3356 3357 Input Parameters: 3358 + mat - the matrix 3359 - fact - factor 3360 3361 Options Database Key: 3362 . -mat_use_hash_table <fact> - provide the factor 3363 3364 Level: advanced 3365 3366 .seealso: `Mat`, `MATMPIBAIJ`, `MatSetOption()` 3367 @*/ 3368 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3369 { 3370 PetscFunctionBegin; 3371 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3372 PetscFunctionReturn(PETSC_SUCCESS); 3373 } 3374 3375 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3376 { 3377 Mat_MPIBAIJ *baij; 3378 3379 PetscFunctionBegin; 3380 baij = (Mat_MPIBAIJ *)mat->data; 3381 baij->ht_fact = fact; 3382 PetscFunctionReturn(PETSC_SUCCESS); 3383 } 3384 3385 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3386 { 3387 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3388 PetscBool flg; 3389 3390 PetscFunctionBegin; 3391 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3392 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3393 if (Ad) *Ad = a->A; 3394 if (Ao) *Ao = a->B; 3395 if (colmap) *colmap = a->garray; 3396 PetscFunctionReturn(PETSC_SUCCESS); 3397 } 3398 3399 /* 3400 Special version for direct calls from Fortran (to eliminate two function call overheads 3401 */ 3402 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3403 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3404 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3405 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3406 #endif 3407 3408 // PetscClangLinter pragma disable: -fdoc-synopsis-matching-symbol-name 3409 /*@C 3410 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3411 3412 Collective 3413 3414 Input Parameters: 3415 + matin - the matrix 3416 . min - number of input rows 3417 . im - input rows 3418 . nin - number of input columns 3419 . in - input columns 3420 . v - numerical values input 3421 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3422 3423 Level: advanced 3424 3425 Developer Notes: 3426 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3427 3428 .seealso: `Mat`, `MatSetValuesBlocked()` 3429 @*/ 3430 PETSC_EXTERN PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3431 { 3432 /* convert input arguments to C version */ 3433 Mat mat = *matin; 3434 PetscInt m = *min, n = *nin; 3435 InsertMode addv = *addvin; 3436 3437 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3438 const MatScalar *value; 3439 MatScalar *barray = baij->barray; 3440 PetscBool roworiented = baij->roworiented; 3441 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3442 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3443 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3444 3445 PetscFunctionBegin; 3446 /* tasks normally handled by MatSetValuesBlocked() */ 3447 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3448 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3449 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3450 if (mat->assembled) { 3451 mat->was_assembled = PETSC_TRUE; 3452 mat->assembled = PETSC_FALSE; 3453 } 3454 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3455 3456 if (!barray) { 3457 PetscCall(PetscMalloc1(bs2, &barray)); 3458 baij->barray = barray; 3459 } 3460 3461 if (roworiented) stepval = (n - 1) * bs; 3462 else stepval = (m - 1) * bs; 3463 3464 for (i = 0; i < m; i++) { 3465 if (im[i] < 0) continue; 3466 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3467 if (im[i] >= rstart && im[i] < rend) { 3468 row = im[i] - rstart; 3469 for (j = 0; j < n; j++) { 3470 /* If NumCol = 1 then a copy is not required */ 3471 if ((roworiented) && (n == 1)) { 3472 barray = (MatScalar *)v + i * bs2; 3473 } else if ((!roworiented) && (m == 1)) { 3474 barray = (MatScalar *)v + j * bs2; 3475 } else { /* Here a copy is required */ 3476 if (roworiented) { 3477 value = v + i * (stepval + bs) * bs + j * bs; 3478 } else { 3479 value = v + j * (stepval + bs) * bs + i * bs; 3480 } 3481 for (ii = 0; ii < bs; ii++, value += stepval) { 3482 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3483 } 3484 barray -= bs2; 3485 } 3486 3487 if (in[j] >= cstart && in[j] < cend) { 3488 col = in[j] - cstart; 3489 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3490 } else if (in[j] < 0) { 3491 continue; 3492 } else { 3493 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3494 if (mat->was_assembled) { 3495 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3496 3497 #if defined(PETSC_USE_DEBUG) 3498 #if defined(PETSC_USE_CTABLE) 3499 { 3500 PetscInt data; 3501 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3502 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3503 } 3504 #else 3505 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3506 #endif 3507 #endif 3508 #if defined(PETSC_USE_CTABLE) 3509 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3510 col = (col - 1) / bs; 3511 #else 3512 col = (baij->colmap[in[j]] - 1) / bs; 3513 #endif 3514 if (col < 0 && !((Mat_SeqBAIJ *)baij->A->data)->nonew) { 3515 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3516 col = in[j]; 3517 } 3518 } else col = in[j]; 3519 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3520 } 3521 } 3522 } else { 3523 if (!baij->donotstash) { 3524 if (roworiented) { 3525 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3526 } else { 3527 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3528 } 3529 } 3530 } 3531 } 3532 3533 /* task normally handled by MatSetValuesBlocked() */ 3534 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3535 PetscFunctionReturn(PETSC_SUCCESS); 3536 } 3537 3538 /*@ 3539 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block CSR format for the local rows. 3540 3541 Collective 3542 3543 Input Parameters: 3544 + comm - MPI communicator 3545 . bs - the block size, only a block size of 1 is supported 3546 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3547 . n - This value should be the same as the local size used in creating the 3548 x vector for the matrix-vector product $ y = Ax $. (or `PETSC_DECIDE` to have 3549 calculated if `N` is given) For square matrices `n` is almost always `m`. 3550 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if `m` is given) 3551 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if `n` is given) 3552 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3553 . j - column indices 3554 - a - matrix values 3555 3556 Output Parameter: 3557 . mat - the matrix 3558 3559 Level: intermediate 3560 3561 Notes: 3562 The `i`, `j`, and `a` arrays ARE copied by this routine into the internal format used by PETSc; 3563 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3564 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3565 3566 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3567 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3568 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3569 with column-major ordering within blocks. 3570 3571 The `i` and `j` indices are 0 based, and `i` indices are indices corresponding to the local `j` array. 3572 3573 .seealso: `Mat`, `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3574 `MATMPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3575 @*/ 3576 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3577 { 3578 PetscFunctionBegin; 3579 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3580 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3581 PetscCall(MatCreate(comm, mat)); 3582 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3583 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3584 PetscCall(MatSetBlockSize(*mat, bs)); 3585 PetscCall(MatSetUp(*mat)); 3586 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3587 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3588 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3589 PetscFunctionReturn(PETSC_SUCCESS); 3590 } 3591 3592 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3593 { 3594 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3595 PetscInt *indx; 3596 PetscScalar *values; 3597 3598 PetscFunctionBegin; 3599 PetscCall(MatGetSize(inmat, &m, &N)); 3600 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3601 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3602 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3603 PetscInt *bindx, rmax = a->rmax, j; 3604 PetscMPIInt rank, size; 3605 3606 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3607 mbs = m / bs; 3608 Nbs = N / cbs; 3609 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3610 nbs = n / cbs; 3611 3612 PetscCall(PetscMalloc1(rmax, &bindx)); 3613 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3614 3615 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3616 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3617 if (rank == size - 1) { 3618 /* Check sum(nbs) = Nbs */ 3619 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3620 } 3621 3622 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3623 for (i = 0; i < mbs; i++) { 3624 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3625 nnz = nnz / bs; 3626 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3627 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3628 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3629 } 3630 PetscCall(PetscFree(bindx)); 3631 3632 PetscCall(MatCreate(comm, outmat)); 3633 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3634 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3635 PetscCall(MatSetType(*outmat, MATBAIJ)); 3636 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3637 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3638 MatPreallocateEnd(dnz, onz); 3639 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3640 } 3641 3642 /* numeric phase */ 3643 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3644 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3645 3646 for (i = 0; i < m; i++) { 3647 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3648 Ii = i + rstart; 3649 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3650 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3651 } 3652 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3653 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3654 PetscFunctionReturn(PETSC_SUCCESS); 3655 } 3656