1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) { 12 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 13 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 14 PetscScalar *va, *vv; 15 Vec vB, vA; 16 const PetscScalar *vb; 17 18 PetscFunctionBegin; 19 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 20 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 21 22 PetscCall(VecGetArrayWrite(vA, &va)); 23 if (idx) { 24 for (i = 0; i < m; i++) { 25 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 26 } 27 } 28 29 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 30 PetscCall(PetscMalloc1(m, &idxb)); 31 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 32 33 PetscCall(VecGetArrayWrite(v, &vv)); 34 PetscCall(VecGetArrayRead(vB, &vb)); 35 for (i = 0; i < m; i++) { 36 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 37 vv[i] = vb[i]; 38 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 39 } else { 40 vv[i] = va[i]; 41 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 42 } 43 } 44 PetscCall(VecRestoreArrayWrite(vA, &vv)); 45 PetscCall(VecRestoreArrayWrite(vA, &va)); 46 PetscCall(VecRestoreArrayRead(vB, &vb)); 47 PetscCall(PetscFree(idxb)); 48 PetscCall(VecDestroy(&vA)); 49 PetscCall(VecDestroy(&vB)); 50 PetscFunctionReturn(0); 51 } 52 53 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) { 54 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 55 56 PetscFunctionBegin; 57 PetscCall(MatStoreValues(aij->A)); 58 PetscCall(MatStoreValues(aij->B)); 59 PetscFunctionReturn(0); 60 } 61 62 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) { 63 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 64 65 PetscFunctionBegin; 66 PetscCall(MatRetrieveValues(aij->A)); 67 PetscCall(MatRetrieveValues(aij->B)); 68 PetscFunctionReturn(0); 69 } 70 71 /* 72 Local utility routine that creates a mapping from the global column 73 number to the local number in the off-diagonal part of the local 74 storage of the matrix. This is done in a non scalable way since the 75 length of colmap equals the global matrix length. 76 */ 77 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) { 78 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 79 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 80 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 81 82 PetscFunctionBegin; 83 #if defined(PETSC_USE_CTABLE) 84 PetscCall(PetscTableCreate(baij->nbs, baij->Nbs + 1, &baij->colmap)); 85 for (i = 0; i < nbs; i++) PetscCall(PetscTableAdd(baij->colmap, baij->garray[i] + 1, i * bs + 1, INSERT_VALUES)); 86 #else 87 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 88 PetscCall(PetscLogObjectMemory((PetscObject)mat, baij->Nbs * sizeof(PetscInt))); 89 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 90 #endif 91 PetscFunctionReturn(0); 92 } 93 94 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 95 { \ 96 brow = row / bs; \ 97 rp = aj + ai[brow]; \ 98 ap = aa + bs2 * ai[brow]; \ 99 rmax = aimax[brow]; \ 100 nrow = ailen[brow]; \ 101 bcol = col / bs; \ 102 ridx = row % bs; \ 103 cidx = col % bs; \ 104 low = 0; \ 105 high = nrow; \ 106 while (high - low > 3) { \ 107 t = (low + high) / 2; \ 108 if (rp[t] > bcol) high = t; \ 109 else low = t; \ 110 } \ 111 for (_i = low; _i < high; _i++) { \ 112 if (rp[_i] > bcol) break; \ 113 if (rp[_i] == bcol) { \ 114 bap = ap + bs2 * _i + bs * cidx + ridx; \ 115 if (addv == ADD_VALUES) *bap += value; \ 116 else *bap = value; \ 117 goto a_noinsert; \ 118 } \ 119 } \ 120 if (a->nonew == 1) goto a_noinsert; \ 121 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 122 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 123 N = nrow++ - 1; \ 124 /* shift up all the later entries in this row */ \ 125 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 126 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 127 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 128 rp[_i] = bcol; \ 129 ap[bs2 * _i + bs * cidx + ridx] = value; \ 130 a_noinsert:; \ 131 ailen[brow] = nrow; \ 132 } 133 134 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 135 { \ 136 brow = row / bs; \ 137 rp = bj + bi[brow]; \ 138 ap = ba + bs2 * bi[brow]; \ 139 rmax = bimax[brow]; \ 140 nrow = bilen[brow]; \ 141 bcol = col / bs; \ 142 ridx = row % bs; \ 143 cidx = col % bs; \ 144 low = 0; \ 145 high = nrow; \ 146 while (high - low > 3) { \ 147 t = (low + high) / 2; \ 148 if (rp[t] > bcol) high = t; \ 149 else low = t; \ 150 } \ 151 for (_i = low; _i < high; _i++) { \ 152 if (rp[_i] > bcol) break; \ 153 if (rp[_i] == bcol) { \ 154 bap = ap + bs2 * _i + bs * cidx + ridx; \ 155 if (addv == ADD_VALUES) *bap += value; \ 156 else *bap = value; \ 157 goto b_noinsert; \ 158 } \ 159 } \ 160 if (b->nonew == 1) goto b_noinsert; \ 161 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 162 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 163 N = nrow++ - 1; \ 164 /* shift up all the later entries in this row */ \ 165 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 166 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 167 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 168 rp[_i] = bcol; \ 169 ap[bs2 * _i + bs * cidx + ridx] = value; \ 170 b_noinsert:; \ 171 bilen[brow] = nrow; \ 172 } 173 174 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 175 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 176 MatScalar value; 177 PetscBool roworiented = baij->roworiented; 178 PetscInt i, j, row, col; 179 PetscInt rstart_orig = mat->rmap->rstart; 180 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 181 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 182 183 /* Some Variables required in the macro */ 184 Mat A = baij->A; 185 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 186 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 187 MatScalar *aa = a->a; 188 189 Mat B = baij->B; 190 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 191 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 192 MatScalar *ba = b->a; 193 194 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 195 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 196 MatScalar *ap, *bap; 197 198 PetscFunctionBegin; 199 for (i = 0; i < m; i++) { 200 if (im[i] < 0) continue; 201 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 202 if (im[i] >= rstart_orig && im[i] < rend_orig) { 203 row = im[i] - rstart_orig; 204 for (j = 0; j < n; j++) { 205 if (in[j] >= cstart_orig && in[j] < cend_orig) { 206 col = in[j] - cstart_orig; 207 if (roworiented) value = v[i * n + j]; 208 else value = v[i + j * m]; 209 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 210 } else if (in[j] < 0) { 211 continue; 212 } else { 213 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 214 if (mat->was_assembled) { 215 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 216 #if defined(PETSC_USE_CTABLE) 217 PetscCall(PetscTableFind(baij->colmap, in[j] / bs + 1, &col)); 218 col = col - 1; 219 #else 220 col = baij->colmap[in[j] / bs] - 1; 221 #endif 222 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 223 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 224 col = in[j]; 225 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 226 B = baij->B; 227 b = (Mat_SeqBAIJ *)(B)->data; 228 bimax = b->imax; 229 bi = b->i; 230 bilen = b->ilen; 231 bj = b->j; 232 ba = b->a; 233 } else { 234 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 235 col += in[j] % bs; 236 } 237 } else col = in[j]; 238 if (roworiented) value = v[i * n + j]; 239 else value = v[i + j * m]; 240 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 241 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 242 } 243 } 244 } else { 245 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 246 if (!baij->donotstash) { 247 mat->assembled = PETSC_FALSE; 248 if (roworiented) { 249 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 250 } else { 251 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 252 } 253 } 254 } 255 } 256 PetscFunctionReturn(0); 257 } 258 259 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) { 260 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 261 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 262 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 263 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 264 PetscBool roworiented = a->roworiented; 265 const PetscScalar *value = v; 266 MatScalar *ap, *aa = a->a, *bap; 267 268 PetscFunctionBegin; 269 rp = aj + ai[row]; 270 ap = aa + bs2 * ai[row]; 271 rmax = imax[row]; 272 nrow = ailen[row]; 273 value = v; 274 low = 0; 275 high = nrow; 276 while (high - low > 7) { 277 t = (low + high) / 2; 278 if (rp[t] > col) high = t; 279 else low = t; 280 } 281 for (i = low; i < high; i++) { 282 if (rp[i] > col) break; 283 if (rp[i] == col) { 284 bap = ap + bs2 * i; 285 if (roworiented) { 286 if (is == ADD_VALUES) { 287 for (ii = 0; ii < bs; ii++) { 288 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 289 } 290 } else { 291 for (ii = 0; ii < bs; ii++) { 292 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 293 } 294 } 295 } else { 296 if (is == ADD_VALUES) { 297 for (ii = 0; ii < bs; ii++, value += bs) { 298 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 299 bap += bs; 300 } 301 } else { 302 for (ii = 0; ii < bs; ii++, value += bs) { 303 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 304 bap += bs; 305 } 306 } 307 } 308 goto noinsert2; 309 } 310 } 311 if (nonew == 1) goto noinsert2; 312 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 313 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 314 N = nrow++ - 1; 315 high++; 316 /* shift up all the later entries in this row */ 317 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 318 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 319 rp[i] = col; 320 bap = ap + bs2 * i; 321 if (roworiented) { 322 for (ii = 0; ii < bs; ii++) { 323 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 324 } 325 } else { 326 for (ii = 0; ii < bs; ii++) { 327 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 328 } 329 } 330 noinsert2:; 331 ailen[row] = nrow; 332 PetscFunctionReturn(0); 333 } 334 335 /* 336 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 337 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 338 */ 339 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 340 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 341 const PetscScalar *value; 342 MatScalar *barray = baij->barray; 343 PetscBool roworiented = baij->roworiented; 344 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 345 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 346 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 347 348 PetscFunctionBegin; 349 if (!barray) { 350 PetscCall(PetscMalloc1(bs2, &barray)); 351 baij->barray = barray; 352 } 353 354 if (roworiented) stepval = (n - 1) * bs; 355 else stepval = (m - 1) * bs; 356 357 for (i = 0; i < m; i++) { 358 if (im[i] < 0) continue; 359 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 360 if (im[i] >= rstart && im[i] < rend) { 361 row = im[i] - rstart; 362 for (j = 0; j < n; j++) { 363 /* If NumCol = 1 then a copy is not required */ 364 if ((roworiented) && (n == 1)) { 365 barray = (MatScalar *)v + i * bs2; 366 } else if ((!roworiented) && (m == 1)) { 367 barray = (MatScalar *)v + j * bs2; 368 } else { /* Here a copy is required */ 369 if (roworiented) { 370 value = v + (i * (stepval + bs) + j) * bs; 371 } else { 372 value = v + (j * (stepval + bs) + i) * bs; 373 } 374 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 375 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 376 barray += bs; 377 } 378 barray -= bs2; 379 } 380 381 if (in[j] >= cstart && in[j] < cend) { 382 col = in[j] - cstart; 383 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 384 } else if (in[j] < 0) { 385 continue; 386 } else { 387 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 388 if (mat->was_assembled) { 389 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 390 391 #if defined(PETSC_USE_DEBUG) 392 #if defined(PETSC_USE_CTABLE) 393 { 394 PetscInt data; 395 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 396 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 397 } 398 #else 399 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 400 #endif 401 #endif 402 #if defined(PETSC_USE_CTABLE) 403 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 404 col = (col - 1) / bs; 405 #else 406 col = (baij->colmap[in[j]] - 1) / bs; 407 #endif 408 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 409 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 410 col = in[j]; 411 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 412 } else col = in[j]; 413 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 414 } 415 } 416 } else { 417 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 418 if (!baij->donotstash) { 419 if (roworiented) { 420 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 421 } else { 422 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 423 } 424 } 425 } 426 } 427 PetscFunctionReturn(0); 428 } 429 430 #define HASH_KEY 0.6180339887 431 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 432 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 433 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 434 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 435 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 436 PetscBool roworiented = baij->roworiented; 437 PetscInt i, j, row, col; 438 PetscInt rstart_orig = mat->rmap->rstart; 439 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 440 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 441 PetscReal tmp; 442 MatScalar **HD = baij->hd, value; 443 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 444 445 PetscFunctionBegin; 446 for (i = 0; i < m; i++) { 447 if (PetscDefined(USE_DEBUG)) { 448 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 449 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 450 } 451 row = im[i]; 452 if (row >= rstart_orig && row < rend_orig) { 453 for (j = 0; j < n; j++) { 454 col = in[j]; 455 if (roworiented) value = v[i * n + j]; 456 else value = v[i + j * m]; 457 /* Look up PetscInto the Hash Table */ 458 key = (row / bs) * Nbs + (col / bs) + 1; 459 h1 = HASH(size, key, tmp); 460 461 idx = h1; 462 if (PetscDefined(USE_DEBUG)) { 463 insert_ct++; 464 total_ct++; 465 if (HT[idx] != key) { 466 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 467 ; 468 if (idx == size) { 469 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 470 ; 471 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 472 } 473 } 474 } else if (HT[idx] != key) { 475 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 476 ; 477 if (idx == size) { 478 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 479 ; 480 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 481 } 482 } 483 /* A HASH table entry is found, so insert the values at the correct address */ 484 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 485 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 486 } 487 } else if (!baij->donotstash) { 488 if (roworiented) { 489 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 490 } else { 491 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 492 } 493 } 494 } 495 if (PetscDefined(USE_DEBUG)) { 496 baij->ht_total_ct += total_ct; 497 baij->ht_insert_ct += insert_ct; 498 } 499 PetscFunctionReturn(0); 500 } 501 502 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) { 503 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 504 PetscBool roworiented = baij->roworiented; 505 PetscInt i, j, ii, jj, row, col; 506 PetscInt rstart = baij->rstartbs; 507 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 508 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 509 PetscReal tmp; 510 MatScalar **HD = baij->hd, *baij_a; 511 const PetscScalar *v_t, *value; 512 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 513 514 PetscFunctionBegin; 515 if (roworiented) stepval = (n - 1) * bs; 516 else stepval = (m - 1) * bs; 517 518 for (i = 0; i < m; i++) { 519 if (PetscDefined(USE_DEBUG)) { 520 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 521 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 522 } 523 row = im[i]; 524 v_t = v + i * nbs2; 525 if (row >= rstart && row < rend) { 526 for (j = 0; j < n; j++) { 527 col = in[j]; 528 529 /* Look up into the Hash Table */ 530 key = row * Nbs + col + 1; 531 h1 = HASH(size, key, tmp); 532 533 idx = h1; 534 if (PetscDefined(USE_DEBUG)) { 535 total_ct++; 536 insert_ct++; 537 if (HT[idx] != key) { 538 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 539 ; 540 if (idx == size) { 541 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 542 ; 543 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 544 } 545 } 546 } else if (HT[idx] != key) { 547 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 548 ; 549 if (idx == size) { 550 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 551 ; 552 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 553 } 554 } 555 baij_a = HD[idx]; 556 if (roworiented) { 557 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 558 /* value = v + (i*(stepval+bs)+j)*bs; */ 559 value = v_t; 560 v_t += bs; 561 if (addv == ADD_VALUES) { 562 for (ii = 0; ii < bs; ii++, value += stepval) { 563 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 564 } 565 } else { 566 for (ii = 0; ii < bs; ii++, value += stepval) { 567 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 568 } 569 } 570 } else { 571 value = v + j * (stepval + bs) * bs + i * bs; 572 if (addv == ADD_VALUES) { 573 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 574 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 575 } 576 } else { 577 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 578 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 579 } 580 } 581 } 582 } 583 } else { 584 if (!baij->donotstash) { 585 if (roworiented) { 586 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 587 } else { 588 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 589 } 590 } 591 } 592 } 593 if (PetscDefined(USE_DEBUG)) { 594 baij->ht_total_ct += total_ct; 595 baij->ht_insert_ct += insert_ct; 596 } 597 PetscFunctionReturn(0); 598 } 599 600 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) { 601 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 602 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 603 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 604 605 PetscFunctionBegin; 606 for (i = 0; i < m; i++) { 607 if (idxm[i] < 0) continue; /* negative row */ 608 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 609 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 610 row = idxm[i] - bsrstart; 611 for (j = 0; j < n; j++) { 612 if (idxn[j] < 0) continue; /* negative column */ 613 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 614 if (idxn[j] >= bscstart && idxn[j] < bscend) { 615 col = idxn[j] - bscstart; 616 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 617 } else { 618 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 619 #if defined(PETSC_USE_CTABLE) 620 PetscCall(PetscTableFind(baij->colmap, idxn[j] / bs + 1, &data)); 621 data--; 622 #else 623 data = baij->colmap[idxn[j] / bs] - 1; 624 #endif 625 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 626 else { 627 col = data + idxn[j] % bs; 628 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 629 } 630 } 631 } 632 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 633 } 634 PetscFunctionReturn(0); 635 } 636 637 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) { 638 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 639 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 640 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 641 PetscReal sum = 0.0; 642 MatScalar *v; 643 644 PetscFunctionBegin; 645 if (baij->size == 1) { 646 PetscCall(MatNorm(baij->A, type, nrm)); 647 } else { 648 if (type == NORM_FROBENIUS) { 649 v = amat->a; 650 nz = amat->nz * bs2; 651 for (i = 0; i < nz; i++) { 652 sum += PetscRealPart(PetscConj(*v) * (*v)); 653 v++; 654 } 655 v = bmat->a; 656 nz = bmat->nz * bs2; 657 for (i = 0; i < nz; i++) { 658 sum += PetscRealPart(PetscConj(*v) * (*v)); 659 v++; 660 } 661 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 662 *nrm = PetscSqrtReal(*nrm); 663 } else if (type == NORM_1) { /* max column sum */ 664 PetscReal *tmp, *tmp2; 665 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 666 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 667 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 668 v = amat->a; 669 jj = amat->j; 670 for (i = 0; i < amat->nz; i++) { 671 for (j = 0; j < bs; j++) { 672 col = bs * (cstart + *jj) + j; /* column index */ 673 for (row = 0; row < bs; row++) { 674 tmp[col] += PetscAbsScalar(*v); 675 v++; 676 } 677 } 678 jj++; 679 } 680 v = bmat->a; 681 jj = bmat->j; 682 for (i = 0; i < bmat->nz; i++) { 683 for (j = 0; j < bs; j++) { 684 col = bs * garray[*jj] + j; 685 for (row = 0; row < bs; row++) { 686 tmp[col] += PetscAbsScalar(*v); 687 v++; 688 } 689 } 690 jj++; 691 } 692 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 693 *nrm = 0.0; 694 for (j = 0; j < mat->cmap->N; j++) { 695 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 696 } 697 PetscCall(PetscFree(tmp)); 698 PetscCall(PetscFree(tmp2)); 699 } else if (type == NORM_INFINITY) { /* max row sum */ 700 PetscReal *sums; 701 PetscCall(PetscMalloc1(bs, &sums)); 702 sum = 0.0; 703 for (j = 0; j < amat->mbs; j++) { 704 for (row = 0; row < bs; row++) sums[row] = 0.0; 705 v = amat->a + bs2 * amat->i[j]; 706 nz = amat->i[j + 1] - amat->i[j]; 707 for (i = 0; i < nz; i++) { 708 for (col = 0; col < bs; col++) { 709 for (row = 0; row < bs; row++) { 710 sums[row] += PetscAbsScalar(*v); 711 v++; 712 } 713 } 714 } 715 v = bmat->a + bs2 * bmat->i[j]; 716 nz = bmat->i[j + 1] - bmat->i[j]; 717 for (i = 0; i < nz; i++) { 718 for (col = 0; col < bs; col++) { 719 for (row = 0; row < bs; row++) { 720 sums[row] += PetscAbsScalar(*v); 721 v++; 722 } 723 } 724 } 725 for (row = 0; row < bs; row++) { 726 if (sums[row] > sum) sum = sums[row]; 727 } 728 } 729 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 730 PetscCall(PetscFree(sums)); 731 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 732 } 733 PetscFunctionReturn(0); 734 } 735 736 /* 737 Creates the hash table, and sets the table 738 This table is created only once. 739 If new entried need to be added to the matrix 740 then the hash table has to be destroyed and 741 recreated. 742 */ 743 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) { 744 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 745 Mat A = baij->A, B = baij->B; 746 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 747 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 748 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 749 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 750 PetscInt *HT, key; 751 MatScalar **HD; 752 PetscReal tmp; 753 #if defined(PETSC_USE_INFO) 754 PetscInt ct = 0, max = 0; 755 #endif 756 757 PetscFunctionBegin; 758 if (baij->ht) PetscFunctionReturn(0); 759 760 baij->ht_size = (PetscInt)(factor * nz); 761 ht_size = baij->ht_size; 762 763 /* Allocate Memory for Hash Table */ 764 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 765 HD = baij->hd; 766 HT = baij->ht; 767 768 /* Loop Over A */ 769 for (i = 0; i < a->mbs; i++) { 770 for (j = ai[i]; j < ai[i + 1]; j++) { 771 row = i + rstart; 772 col = aj[j] + cstart; 773 774 key = row * Nbs + col + 1; 775 h1 = HASH(ht_size, key, tmp); 776 for (k = 0; k < ht_size; k++) { 777 if (!HT[(h1 + k) % ht_size]) { 778 HT[(h1 + k) % ht_size] = key; 779 HD[(h1 + k) % ht_size] = a->a + j * bs2; 780 break; 781 #if defined(PETSC_USE_INFO) 782 } else { 783 ct++; 784 #endif 785 } 786 } 787 #if defined(PETSC_USE_INFO) 788 if (k > max) max = k; 789 #endif 790 } 791 } 792 /* Loop Over B */ 793 for (i = 0; i < b->mbs; i++) { 794 for (j = bi[i]; j < bi[i + 1]; j++) { 795 row = i + rstart; 796 col = garray[bj[j]]; 797 key = row * Nbs + col + 1; 798 h1 = HASH(ht_size, key, tmp); 799 for (k = 0; k < ht_size; k++) { 800 if (!HT[(h1 + k) % ht_size]) { 801 HT[(h1 + k) % ht_size] = key; 802 HD[(h1 + k) % ht_size] = b->a + j * bs2; 803 break; 804 #if defined(PETSC_USE_INFO) 805 } else { 806 ct++; 807 #endif 808 } 809 } 810 #if defined(PETSC_USE_INFO) 811 if (k > max) max = k; 812 #endif 813 } 814 } 815 816 /* Print Summary */ 817 #if defined(PETSC_USE_INFO) 818 for (i = 0, j = 0; i < ht_size; i++) { 819 if (HT[i]) j++; 820 } 821 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 822 #endif 823 PetscFunctionReturn(0); 824 } 825 826 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) { 827 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 828 PetscInt nstash, reallocs; 829 830 PetscFunctionBegin; 831 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 832 833 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 834 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 835 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 836 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 837 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 838 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 839 PetscFunctionReturn(0); 840 } 841 842 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) { 843 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 844 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 845 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 846 PetscInt *row, *col; 847 PetscBool r1, r2, r3, other_disassembled; 848 MatScalar *val; 849 PetscMPIInt n; 850 851 PetscFunctionBegin; 852 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 853 if (!baij->donotstash && !mat->nooffprocentries) { 854 while (1) { 855 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 856 if (!flg) break; 857 858 for (i = 0; i < n;) { 859 /* Now identify the consecutive vals belonging to the same row */ 860 for (j = i, rstart = row[j]; j < n; j++) { 861 if (row[j] != rstart) break; 862 } 863 if (j < n) ncols = j - i; 864 else ncols = n - i; 865 /* Now assemble all these values with a single function call */ 866 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 867 i = j; 868 } 869 } 870 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 871 /* Now process the block-stash. Since the values are stashed column-oriented, 872 set the roworiented flag to column oriented, and after MatSetValues() 873 restore the original flags */ 874 r1 = baij->roworiented; 875 r2 = a->roworiented; 876 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 877 878 baij->roworiented = PETSC_FALSE; 879 a->roworiented = PETSC_FALSE; 880 881 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 882 while (1) { 883 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 884 if (!flg) break; 885 886 for (i = 0; i < n;) { 887 /* Now identify the consecutive vals belonging to the same row */ 888 for (j = i, rstart = row[j]; j < n; j++) { 889 if (row[j] != rstart) break; 890 } 891 if (j < n) ncols = j - i; 892 else ncols = n - i; 893 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 894 i = j; 895 } 896 } 897 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 898 899 baij->roworiented = r1; 900 a->roworiented = r2; 901 902 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 903 } 904 905 PetscCall(MatAssemblyBegin(baij->A, mode)); 906 PetscCall(MatAssemblyEnd(baij->A, mode)); 907 908 /* determine if any processor has disassembled, if so we must 909 also disassemble ourselves, in order that we may reassemble. */ 910 /* 911 if nonzero structure of submatrix B cannot change then we know that 912 no processor disassembled thus we can skip this stuff 913 */ 914 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 915 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 916 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 917 } 918 919 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 920 PetscCall(MatAssemblyBegin(baij->B, mode)); 921 PetscCall(MatAssemblyEnd(baij->B, mode)); 922 923 #if defined(PETSC_USE_INFO) 924 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 925 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 926 927 baij->ht_total_ct = 0; 928 baij->ht_insert_ct = 0; 929 } 930 #endif 931 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 932 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 933 934 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 935 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 936 } 937 938 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 939 940 baij->rowvalues = NULL; 941 942 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 943 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 944 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 945 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 946 } 947 PetscFunctionReturn(0); 948 } 949 950 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 951 #include <petscdraw.h> 952 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) { 953 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 954 PetscMPIInt rank = baij->rank; 955 PetscInt bs = mat->rmap->bs; 956 PetscBool iascii, isdraw; 957 PetscViewer sviewer; 958 PetscViewerFormat format; 959 960 PetscFunctionBegin; 961 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 962 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 963 if (iascii) { 964 PetscCall(PetscViewerGetFormat(viewer, &format)); 965 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 966 MatInfo info; 967 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 968 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 969 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 970 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 971 mat->rmap->bs, (double)info.memory)); 972 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 973 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 974 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 975 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 976 PetscCall(PetscViewerFlush(viewer)); 977 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 978 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 979 PetscCall(VecScatterView(baij->Mvctx, viewer)); 980 PetscFunctionReturn(0); 981 } else if (format == PETSC_VIEWER_ASCII_INFO) { 982 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 983 PetscFunctionReturn(0); 984 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 985 PetscFunctionReturn(0); 986 } 987 } 988 989 if (isdraw) { 990 PetscDraw draw; 991 PetscBool isnull; 992 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 993 PetscCall(PetscDrawIsNull(draw, &isnull)); 994 if (isnull) PetscFunctionReturn(0); 995 } 996 997 { 998 /* assemble the entire matrix onto first processor. */ 999 Mat A; 1000 Mat_SeqBAIJ *Aloc; 1001 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1002 MatScalar *a; 1003 const char *matname; 1004 1005 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1006 /* Perhaps this should be the type of mat? */ 1007 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1008 if (rank == 0) { 1009 PetscCall(MatSetSizes(A, M, N, M, N)); 1010 } else { 1011 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1012 } 1013 PetscCall(MatSetType(A, MATMPIBAIJ)); 1014 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1015 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1016 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)A)); 1017 1018 /* copy over the A part */ 1019 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1020 ai = Aloc->i; 1021 aj = Aloc->j; 1022 a = Aloc->a; 1023 PetscCall(PetscMalloc1(bs, &rvals)); 1024 1025 for (i = 0; i < mbs; i++) { 1026 rvals[0] = bs * (baij->rstartbs + i); 1027 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1028 for (j = ai[i]; j < ai[i + 1]; j++) { 1029 col = (baij->cstartbs + aj[j]) * bs; 1030 for (k = 0; k < bs; k++) { 1031 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1032 col++; 1033 a += bs; 1034 } 1035 } 1036 } 1037 /* copy over the B part */ 1038 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1039 ai = Aloc->i; 1040 aj = Aloc->j; 1041 a = Aloc->a; 1042 for (i = 0; i < mbs; i++) { 1043 rvals[0] = bs * (baij->rstartbs + i); 1044 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1045 for (j = ai[i]; j < ai[i + 1]; j++) { 1046 col = baij->garray[aj[j]] * bs; 1047 for (k = 0; k < bs; k++) { 1048 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1049 col++; 1050 a += bs; 1051 } 1052 } 1053 } 1054 PetscCall(PetscFree(rvals)); 1055 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1056 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1057 /* 1058 Everyone has to call to draw the matrix since the graphics waits are 1059 synchronized across all processors that share the PetscDraw object 1060 */ 1061 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1062 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1063 if (rank == 0) { 1064 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1065 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1066 } 1067 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1068 PetscCall(PetscViewerFlush(viewer)); 1069 PetscCall(MatDestroy(&A)); 1070 } 1071 PetscFunctionReturn(0); 1072 } 1073 1074 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1075 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 1076 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1077 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1078 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1079 const PetscInt *garray = aij->garray; 1080 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1081 PetscInt *rowlens, *colidxs; 1082 PetscScalar *matvals; 1083 1084 PetscFunctionBegin; 1085 PetscCall(PetscViewerSetUp(viewer)); 1086 1087 M = mat->rmap->N; 1088 N = mat->cmap->N; 1089 m = mat->rmap->n; 1090 rs = mat->rmap->rstart; 1091 cs = mat->cmap->rstart; 1092 bs = mat->rmap->bs; 1093 nz = bs * bs * (A->nz + B->nz); 1094 1095 /* write matrix header */ 1096 header[0] = MAT_FILE_CLASSID; 1097 header[1] = M; 1098 header[2] = N; 1099 header[3] = nz; 1100 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1101 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1102 1103 /* fill in and store row lengths */ 1104 PetscCall(PetscMalloc1(m, &rowlens)); 1105 for (cnt = 0, i = 0; i < A->mbs; i++) 1106 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1107 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1108 PetscCall(PetscFree(rowlens)); 1109 1110 /* fill in and store column indices */ 1111 PetscCall(PetscMalloc1(nz, &colidxs)); 1112 for (cnt = 0, i = 0; i < A->mbs; i++) { 1113 for (k = 0; k < bs; k++) { 1114 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1115 if (garray[B->j[jb]] > cs / bs) break; 1116 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1117 } 1118 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1119 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1120 for (; jb < B->i[i + 1]; jb++) 1121 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1122 } 1123 } 1124 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1125 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1126 PetscCall(PetscFree(colidxs)); 1127 1128 /* fill in and store nonzero values */ 1129 PetscCall(PetscMalloc1(nz, &matvals)); 1130 for (cnt = 0, i = 0; i < A->mbs; i++) { 1131 for (k = 0; k < bs; k++) { 1132 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1133 if (garray[B->j[jb]] > cs / bs) break; 1134 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1135 } 1136 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1137 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1138 for (; jb < B->i[i + 1]; jb++) 1139 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1140 } 1141 } 1142 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1143 PetscCall(PetscFree(matvals)); 1144 1145 /* write block size option to the viewer's .info file */ 1146 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1147 PetscFunctionReturn(0); 1148 } 1149 1150 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) { 1151 PetscBool iascii, isdraw, issocket, isbinary; 1152 1153 PetscFunctionBegin; 1154 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1155 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1156 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1157 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1158 if (iascii || isdraw || issocket) { 1159 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1160 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) { 1165 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1166 1167 PetscFunctionBegin; 1168 #if defined(PETSC_USE_LOG) 1169 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1170 #endif 1171 PetscCall(MatStashDestroy_Private(&mat->stash)); 1172 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1173 PetscCall(MatDestroy(&baij->A)); 1174 PetscCall(MatDestroy(&baij->B)); 1175 #if defined(PETSC_USE_CTABLE) 1176 PetscCall(PetscTableDestroy(&baij->colmap)); 1177 #else 1178 PetscCall(PetscFree(baij->colmap)); 1179 #endif 1180 PetscCall(PetscFree(baij->garray)); 1181 PetscCall(VecDestroy(&baij->lvec)); 1182 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1183 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1184 PetscCall(PetscFree(baij->barray)); 1185 PetscCall(PetscFree2(baij->hd, baij->ht)); 1186 PetscCall(PetscFree(baij->rangebs)); 1187 PetscCall(PetscFree(mat->data)); 1188 1189 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1190 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1191 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1192 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 1193 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 1194 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1195 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 1196 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 1197 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 1198 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 1199 #if defined(PETSC_HAVE_HYPRE) 1200 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 1201 #endif 1202 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 1203 PetscFunctionReturn(0); 1204 } 1205 1206 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1207 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1208 PetscInt nt; 1209 1210 PetscFunctionBegin; 1211 PetscCall(VecGetLocalSize(xx, &nt)); 1212 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1213 PetscCall(VecGetLocalSize(yy, &nt)); 1214 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy"); 1215 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1216 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1217 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1218 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1223 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1224 1225 PetscFunctionBegin; 1226 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1227 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1228 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1229 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1230 PetscFunctionReturn(0); 1231 } 1232 1233 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) { 1234 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1235 1236 PetscFunctionBegin; 1237 /* do nondiagonal part */ 1238 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1239 /* do local part */ 1240 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1241 /* add partial results together */ 1242 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1243 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1244 PetscFunctionReturn(0); 1245 } 1246 1247 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) { 1248 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1249 1250 PetscFunctionBegin; 1251 /* do nondiagonal part */ 1252 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1253 /* do local part */ 1254 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1255 /* add partial results together */ 1256 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1257 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1258 PetscFunctionReturn(0); 1259 } 1260 1261 /* 1262 This only works correctly for square matrices where the subblock A->A is the 1263 diagonal block 1264 */ 1265 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) { 1266 PetscFunctionBegin; 1267 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1268 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1269 PetscFunctionReturn(0); 1270 } 1271 1272 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) { 1273 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1274 1275 PetscFunctionBegin; 1276 PetscCall(MatScale(a->A, aa)); 1277 PetscCall(MatScale(a->B, aa)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1282 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1283 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1284 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1285 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1286 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1287 1288 PetscFunctionBegin; 1289 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1290 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1291 mat->getrowactive = PETSC_TRUE; 1292 1293 if (!mat->rowvalues && (idx || v)) { 1294 /* 1295 allocate enough space to hold information from the longest row. 1296 */ 1297 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1298 PetscInt max = 1, mbs = mat->mbs, tmp; 1299 for (i = 0; i < mbs; i++) { 1300 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1301 if (max < tmp) max = tmp; 1302 } 1303 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1304 } 1305 lrow = row - brstart; 1306 1307 pvA = &vworkA; 1308 pcA = &cworkA; 1309 pvB = &vworkB; 1310 pcB = &cworkB; 1311 if (!v) { 1312 pvA = NULL; 1313 pvB = NULL; 1314 } 1315 if (!idx) { 1316 pcA = NULL; 1317 if (!v) pcB = NULL; 1318 } 1319 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1320 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1321 nztot = nzA + nzB; 1322 1323 cmap = mat->garray; 1324 if (v || idx) { 1325 if (nztot) { 1326 /* Sort by increasing column numbers, assuming A and B already sorted */ 1327 PetscInt imark = -1; 1328 if (v) { 1329 *v = v_p = mat->rowvalues; 1330 for (i = 0; i < nzB; i++) { 1331 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1332 else break; 1333 } 1334 imark = i; 1335 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1336 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1337 } 1338 if (idx) { 1339 *idx = idx_p = mat->rowindices; 1340 if (imark > -1) { 1341 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1342 } else { 1343 for (i = 0; i < nzB; i++) { 1344 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1345 else break; 1346 } 1347 imark = i; 1348 } 1349 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1350 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1351 } 1352 } else { 1353 if (idx) *idx = NULL; 1354 if (v) *v = NULL; 1355 } 1356 } 1357 *nz = nztot; 1358 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1359 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1360 PetscFunctionReturn(0); 1361 } 1362 1363 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) { 1364 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1365 1366 PetscFunctionBegin; 1367 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1368 baij->getrowactive = PETSC_FALSE; 1369 PetscFunctionReturn(0); 1370 } 1371 1372 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) { 1373 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1374 1375 PetscFunctionBegin; 1376 PetscCall(MatZeroEntries(l->A)); 1377 PetscCall(MatZeroEntries(l->B)); 1378 PetscFunctionReturn(0); 1379 } 1380 1381 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) { 1382 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1383 Mat A = a->A, B = a->B; 1384 PetscLogDouble isend[5], irecv[5]; 1385 1386 PetscFunctionBegin; 1387 info->block_size = (PetscReal)matin->rmap->bs; 1388 1389 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1390 1391 isend[0] = info->nz_used; 1392 isend[1] = info->nz_allocated; 1393 isend[2] = info->nz_unneeded; 1394 isend[3] = info->memory; 1395 isend[4] = info->mallocs; 1396 1397 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1398 1399 isend[0] += info->nz_used; 1400 isend[1] += info->nz_allocated; 1401 isend[2] += info->nz_unneeded; 1402 isend[3] += info->memory; 1403 isend[4] += info->mallocs; 1404 1405 if (flag == MAT_LOCAL) { 1406 info->nz_used = isend[0]; 1407 info->nz_allocated = isend[1]; 1408 info->nz_unneeded = isend[2]; 1409 info->memory = isend[3]; 1410 info->mallocs = isend[4]; 1411 } else if (flag == MAT_GLOBAL_MAX) { 1412 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1413 1414 info->nz_used = irecv[0]; 1415 info->nz_allocated = irecv[1]; 1416 info->nz_unneeded = irecv[2]; 1417 info->memory = irecv[3]; 1418 info->mallocs = irecv[4]; 1419 } else if (flag == MAT_GLOBAL_SUM) { 1420 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1421 1422 info->nz_used = irecv[0]; 1423 info->nz_allocated = irecv[1]; 1424 info->nz_unneeded = irecv[2]; 1425 info->memory = irecv[3]; 1426 info->mallocs = irecv[4]; 1427 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1428 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1429 info->fill_ratio_needed = 0; 1430 info->factor_mallocs = 0; 1431 PetscFunctionReturn(0); 1432 } 1433 1434 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) { 1435 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1436 1437 PetscFunctionBegin; 1438 switch (op) { 1439 case MAT_NEW_NONZERO_LOCATIONS: 1440 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1441 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1442 case MAT_KEEP_NONZERO_PATTERN: 1443 case MAT_NEW_NONZERO_LOCATION_ERR: 1444 MatCheckPreallocated(A, 1); 1445 PetscCall(MatSetOption(a->A, op, flg)); 1446 PetscCall(MatSetOption(a->B, op, flg)); 1447 break; 1448 case MAT_ROW_ORIENTED: 1449 MatCheckPreallocated(A, 1); 1450 a->roworiented = flg; 1451 1452 PetscCall(MatSetOption(a->A, op, flg)); 1453 PetscCall(MatSetOption(a->B, op, flg)); 1454 break; 1455 case MAT_FORCE_DIAGONAL_ENTRIES: 1456 case MAT_SORTED_FULL: PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); break; 1457 case MAT_IGNORE_OFF_PROC_ENTRIES: a->donotstash = flg; break; 1458 case MAT_USE_HASH_TABLE: 1459 a->ht_flag = flg; 1460 a->ht_fact = 1.39; 1461 break; 1462 case MAT_SYMMETRIC: 1463 case MAT_STRUCTURALLY_SYMMETRIC: 1464 case MAT_HERMITIAN: 1465 case MAT_SUBMAT_SINGLEIS: 1466 case MAT_SYMMETRY_ETERNAL: 1467 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1468 case MAT_SPD_ETERNAL: 1469 /* if the diagonal matrix is square it inherits some of the properties above */ 1470 break; 1471 default: SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1472 } 1473 PetscFunctionReturn(0); 1474 } 1475 1476 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) { 1477 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1478 Mat_SeqBAIJ *Aloc; 1479 Mat B; 1480 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1481 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1482 MatScalar *a; 1483 1484 PetscFunctionBegin; 1485 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1486 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1487 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1488 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1489 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1490 /* Do not know preallocation information, but must set block size */ 1491 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1492 } else { 1493 B = *matout; 1494 } 1495 1496 /* copy over the A part */ 1497 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1498 ai = Aloc->i; 1499 aj = Aloc->j; 1500 a = Aloc->a; 1501 PetscCall(PetscMalloc1(bs, &rvals)); 1502 1503 for (i = 0; i < mbs; i++) { 1504 rvals[0] = bs * (baij->rstartbs + i); 1505 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1506 for (j = ai[i]; j < ai[i + 1]; j++) { 1507 col = (baij->cstartbs + aj[j]) * bs; 1508 for (k = 0; k < bs; k++) { 1509 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1510 1511 col++; 1512 a += bs; 1513 } 1514 } 1515 } 1516 /* copy over the B part */ 1517 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1518 ai = Aloc->i; 1519 aj = Aloc->j; 1520 a = Aloc->a; 1521 for (i = 0; i < mbs; i++) { 1522 rvals[0] = bs * (baij->rstartbs + i); 1523 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1524 for (j = ai[i]; j < ai[i + 1]; j++) { 1525 col = baij->garray[aj[j]] * bs; 1526 for (k = 0; k < bs; k++) { 1527 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1528 col++; 1529 a += bs; 1530 } 1531 } 1532 } 1533 PetscCall(PetscFree(rvals)); 1534 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1535 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1536 1537 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1538 else PetscCall(MatHeaderMerge(A, &B)); 1539 PetscFunctionReturn(0); 1540 } 1541 1542 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) { 1543 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1544 Mat a = baij->A, b = baij->B; 1545 PetscInt s1, s2, s3; 1546 1547 PetscFunctionBegin; 1548 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1549 if (rr) { 1550 PetscCall(VecGetLocalSize(rr, &s1)); 1551 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1552 /* Overlap communication with computation. */ 1553 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1554 } 1555 if (ll) { 1556 PetscCall(VecGetLocalSize(ll, &s1)); 1557 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1558 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1559 } 1560 /* scale the diagonal block */ 1561 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1562 1563 if (rr) { 1564 /* Do a scatter end and then right scale the off-diagonal block */ 1565 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1566 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1567 } 1568 PetscFunctionReturn(0); 1569 } 1570 1571 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 1572 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1573 PetscInt *lrows; 1574 PetscInt r, len; 1575 PetscBool cong; 1576 1577 PetscFunctionBegin; 1578 /* get locally owned rows */ 1579 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1580 /* fix right hand side if needed */ 1581 if (x && b) { 1582 const PetscScalar *xx; 1583 PetscScalar *bb; 1584 1585 PetscCall(VecGetArrayRead(x, &xx)); 1586 PetscCall(VecGetArray(b, &bb)); 1587 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1588 PetscCall(VecRestoreArrayRead(x, &xx)); 1589 PetscCall(VecRestoreArray(b, &bb)); 1590 } 1591 1592 /* actually zap the local rows */ 1593 /* 1594 Zero the required rows. If the "diagonal block" of the matrix 1595 is square and the user wishes to set the diagonal we use separate 1596 code so that MatSetValues() is not called for each diagonal allocating 1597 new memory, thus calling lots of mallocs and slowing things down. 1598 1599 */ 1600 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1601 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1602 PetscCall(MatHasCongruentLayouts(A, &cong)); 1603 if ((diag != 0.0) && cong) { 1604 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1605 } else if (diag != 0.0) { 1606 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1607 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1608 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1609 for (r = 0; r < len; ++r) { 1610 const PetscInt row = lrows[r] + A->rmap->rstart; 1611 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1612 } 1613 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1614 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1615 } else { 1616 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1617 } 1618 PetscCall(PetscFree(lrows)); 1619 1620 /* only change matrix nonzero state if pattern was allowed to be changed */ 1621 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1622 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1623 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1624 } 1625 PetscFunctionReturn(0); 1626 } 1627 1628 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) { 1629 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1630 PetscMPIInt n = A->rmap->n, p = 0; 1631 PetscInt i, j, k, r, len = 0, row, col, count; 1632 PetscInt *lrows, *owners = A->rmap->range; 1633 PetscSFNode *rrows; 1634 PetscSF sf; 1635 const PetscScalar *xx; 1636 PetscScalar *bb, *mask; 1637 Vec xmask, lmask; 1638 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1639 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1640 PetscScalar *aa; 1641 1642 PetscFunctionBegin; 1643 /* Create SF where leaves are input rows and roots are owned rows */ 1644 PetscCall(PetscMalloc1(n, &lrows)); 1645 for (r = 0; r < n; ++r) lrows[r] = -1; 1646 PetscCall(PetscMalloc1(N, &rrows)); 1647 for (r = 0; r < N; ++r) { 1648 const PetscInt idx = rows[r]; 1649 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1650 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1651 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1652 } 1653 rrows[r].rank = p; 1654 rrows[r].index = rows[r] - owners[p]; 1655 } 1656 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1657 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1658 /* Collect flags for rows to be zeroed */ 1659 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1660 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1661 PetscCall(PetscSFDestroy(&sf)); 1662 /* Compress and put in row numbers */ 1663 for (r = 0; r < n; ++r) 1664 if (lrows[r] >= 0) lrows[len++] = r; 1665 /* zero diagonal part of matrix */ 1666 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1667 /* handle off diagonal part of matrix */ 1668 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1669 PetscCall(VecDuplicate(l->lvec, &lmask)); 1670 PetscCall(VecGetArray(xmask, &bb)); 1671 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1672 PetscCall(VecRestoreArray(xmask, &bb)); 1673 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1674 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1675 PetscCall(VecDestroy(&xmask)); 1676 if (x) { 1677 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1678 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1679 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1680 PetscCall(VecGetArray(b, &bb)); 1681 } 1682 PetscCall(VecGetArray(lmask, &mask)); 1683 /* remove zeroed rows of off diagonal matrix */ 1684 for (i = 0; i < len; ++i) { 1685 row = lrows[i]; 1686 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1687 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1688 for (k = 0; k < count; ++k) { 1689 aa[0] = 0.0; 1690 aa += bs; 1691 } 1692 } 1693 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1694 for (i = 0; i < l->B->rmap->N; ++i) { 1695 row = i / bs; 1696 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1697 for (k = 0; k < bs; ++k) { 1698 col = bs * baij->j[j] + k; 1699 if (PetscAbsScalar(mask[col])) { 1700 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1701 if (x) bb[i] -= aa[0] * xx[col]; 1702 aa[0] = 0.0; 1703 } 1704 } 1705 } 1706 } 1707 if (x) { 1708 PetscCall(VecRestoreArray(b, &bb)); 1709 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1710 } 1711 PetscCall(VecRestoreArray(lmask, &mask)); 1712 PetscCall(VecDestroy(&lmask)); 1713 PetscCall(PetscFree(lrows)); 1714 1715 /* only change matrix nonzero state if pattern was allowed to be changed */ 1716 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1717 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1718 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1719 } 1720 PetscFunctionReturn(0); 1721 } 1722 1723 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) { 1724 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1725 1726 PetscFunctionBegin; 1727 PetscCall(MatSetUnfactored(a->A)); 1728 PetscFunctionReturn(0); 1729 } 1730 1731 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1732 1733 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) { 1734 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1735 Mat a, b, c, d; 1736 PetscBool flg; 1737 1738 PetscFunctionBegin; 1739 a = matA->A; 1740 b = matA->B; 1741 c = matB->A; 1742 d = matB->B; 1743 1744 PetscCall(MatEqual(a, c, &flg)); 1745 if (flg) PetscCall(MatEqual(b, d, &flg)); 1746 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1747 PetscFunctionReturn(0); 1748 } 1749 1750 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) { 1751 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1752 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1753 1754 PetscFunctionBegin; 1755 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1756 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1757 PetscCall(MatCopy_Basic(A, B, str)); 1758 } else { 1759 PetscCall(MatCopy(a->A, b->A, str)); 1760 PetscCall(MatCopy(a->B, b->B, str)); 1761 } 1762 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1763 PetscFunctionReturn(0); 1764 } 1765 1766 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) { 1767 PetscFunctionBegin; 1768 PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1769 PetscFunctionReturn(0); 1770 } 1771 1772 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) { 1773 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1774 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1775 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1776 1777 PetscFunctionBegin; 1778 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1779 PetscFunctionReturn(0); 1780 } 1781 1782 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) { 1783 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1784 PetscBLASInt bnz, one = 1; 1785 Mat_SeqBAIJ *x, *y; 1786 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1787 1788 PetscFunctionBegin; 1789 if (str == SAME_NONZERO_PATTERN) { 1790 PetscScalar alpha = a; 1791 x = (Mat_SeqBAIJ *)xx->A->data; 1792 y = (Mat_SeqBAIJ *)yy->A->data; 1793 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1794 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1795 x = (Mat_SeqBAIJ *)xx->B->data; 1796 y = (Mat_SeqBAIJ *)yy->B->data; 1797 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1798 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1799 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1800 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1801 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1802 } else { 1803 Mat B; 1804 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1805 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1806 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1807 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1808 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1809 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1810 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1811 PetscCall(MatSetType(B, MATMPIBAIJ)); 1812 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1813 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1814 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1815 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1816 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1817 PetscCall(MatHeaderMerge(Y, &B)); 1818 PetscCall(PetscFree(nnz_d)); 1819 PetscCall(PetscFree(nnz_o)); 1820 } 1821 PetscFunctionReturn(0); 1822 } 1823 1824 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) { 1825 PetscFunctionBegin; 1826 if (PetscDefined(USE_COMPLEX)) { 1827 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1828 1829 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1830 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1831 } 1832 PetscFunctionReturn(0); 1833 } 1834 1835 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) { 1836 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1837 1838 PetscFunctionBegin; 1839 PetscCall(MatRealPart(a->A)); 1840 PetscCall(MatRealPart(a->B)); 1841 PetscFunctionReturn(0); 1842 } 1843 1844 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) { 1845 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1846 1847 PetscFunctionBegin; 1848 PetscCall(MatImaginaryPart(a->A)); 1849 PetscCall(MatImaginaryPart(a->B)); 1850 PetscFunctionReturn(0); 1851 } 1852 1853 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) { 1854 IS iscol_local; 1855 PetscInt csize; 1856 1857 PetscFunctionBegin; 1858 PetscCall(ISGetLocalSize(iscol, &csize)); 1859 if (call == MAT_REUSE_MATRIX) { 1860 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1861 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1862 } else { 1863 PetscCall(ISAllGather(iscol, &iscol_local)); 1864 } 1865 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1866 if (call == MAT_INITIAL_MATRIX) { 1867 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1868 PetscCall(ISDestroy(&iscol_local)); 1869 } 1870 PetscFunctionReturn(0); 1871 } 1872 1873 /* 1874 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1875 in local and then by concatenating the local matrices the end result. 1876 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1877 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1878 */ 1879 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) { 1880 PetscMPIInt rank, size; 1881 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1882 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1883 Mat M, Mreuse; 1884 MatScalar *vwork, *aa; 1885 MPI_Comm comm; 1886 IS isrow_new, iscol_new; 1887 Mat_SeqBAIJ *aij; 1888 1889 PetscFunctionBegin; 1890 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1891 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1892 PetscCallMPI(MPI_Comm_size(comm, &size)); 1893 /* The compression and expansion should be avoided. Doesn't point 1894 out errors, might change the indices, hence buggey */ 1895 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1896 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1897 1898 if (call == MAT_REUSE_MATRIX) { 1899 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1900 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1901 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1902 } else { 1903 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1904 } 1905 PetscCall(ISDestroy(&isrow_new)); 1906 PetscCall(ISDestroy(&iscol_new)); 1907 /* 1908 m - number of local rows 1909 n - number of columns (same on all processors) 1910 rstart - first row in new global matrix generated 1911 */ 1912 PetscCall(MatGetBlockSize(mat, &bs)); 1913 PetscCall(MatGetSize(Mreuse, &m, &n)); 1914 m = m / bs; 1915 n = n / bs; 1916 1917 if (call == MAT_INITIAL_MATRIX) { 1918 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1919 ii = aij->i; 1920 jj = aij->j; 1921 1922 /* 1923 Determine the number of non-zeros in the diagonal and off-diagonal 1924 portions of the matrix in order to do correct preallocation 1925 */ 1926 1927 /* first get start and end of "diagonal" columns */ 1928 if (csize == PETSC_DECIDE) { 1929 PetscCall(ISGetSize(isrow, &mglobal)); 1930 if (mglobal == n * bs) { /* square matrix */ 1931 nlocal = m; 1932 } else { 1933 nlocal = n / size + ((n % size) > rank); 1934 } 1935 } else { 1936 nlocal = csize / bs; 1937 } 1938 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1939 rstart = rend - nlocal; 1940 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1941 1942 /* next, compute all the lengths */ 1943 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1944 for (i = 0; i < m; i++) { 1945 jend = ii[i + 1] - ii[i]; 1946 olen = 0; 1947 dlen = 0; 1948 for (j = 0; j < jend; j++) { 1949 if (*jj < rstart || *jj >= rend) olen++; 1950 else dlen++; 1951 jj++; 1952 } 1953 olens[i] = olen; 1954 dlens[i] = dlen; 1955 } 1956 PetscCall(MatCreate(comm, &M)); 1957 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 1958 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 1959 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1960 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 1961 PetscCall(PetscFree2(dlens, olens)); 1962 } else { 1963 PetscInt ml, nl; 1964 1965 M = *newmat; 1966 PetscCall(MatGetLocalSize(M, &ml, &nl)); 1967 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 1968 PetscCall(MatZeroEntries(M)); 1969 /* 1970 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 1971 rather than the slower MatSetValues(). 1972 */ 1973 M->was_assembled = PETSC_TRUE; 1974 M->assembled = PETSC_FALSE; 1975 } 1976 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 1977 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 1978 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1979 ii = aij->i; 1980 jj = aij->j; 1981 aa = aij->a; 1982 for (i = 0; i < m; i++) { 1983 row = rstart / bs + i; 1984 nz = ii[i + 1] - ii[i]; 1985 cwork = jj; 1986 jj += nz; 1987 vwork = aa; 1988 aa += nz * bs * bs; 1989 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 1990 } 1991 1992 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 1993 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 1994 *newmat = M; 1995 1996 /* save submatrix used in processor for next request */ 1997 if (call == MAT_INITIAL_MATRIX) { 1998 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 1999 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2000 } 2001 PetscFunctionReturn(0); 2002 } 2003 2004 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) { 2005 MPI_Comm comm, pcomm; 2006 PetscInt clocal_size, nrows; 2007 const PetscInt *rows; 2008 PetscMPIInt size; 2009 IS crowp, lcolp; 2010 2011 PetscFunctionBegin; 2012 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2013 /* make a collective version of 'rowp' */ 2014 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2015 if (pcomm == comm) { 2016 crowp = rowp; 2017 } else { 2018 PetscCall(ISGetSize(rowp, &nrows)); 2019 PetscCall(ISGetIndices(rowp, &rows)); 2020 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2021 PetscCall(ISRestoreIndices(rowp, &rows)); 2022 } 2023 PetscCall(ISSetPermutation(crowp)); 2024 /* make a local version of 'colp' */ 2025 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2026 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2027 if (size == 1) { 2028 lcolp = colp; 2029 } else { 2030 PetscCall(ISAllGather(colp, &lcolp)); 2031 } 2032 PetscCall(ISSetPermutation(lcolp)); 2033 /* now we just get the submatrix */ 2034 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2035 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2036 /* clean up */ 2037 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2038 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2039 PetscFunctionReturn(0); 2040 } 2041 2042 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) { 2043 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2044 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2045 2046 PetscFunctionBegin; 2047 if (nghosts) *nghosts = B->nbs; 2048 if (ghosts) *ghosts = baij->garray; 2049 PetscFunctionReturn(0); 2050 } 2051 2052 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) { 2053 Mat B; 2054 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2055 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2056 Mat_SeqAIJ *b; 2057 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2058 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2059 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2060 2061 PetscFunctionBegin; 2062 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2063 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2064 2065 /* ---------------------------------------------------------------- 2066 Tell every processor the number of nonzeros per row 2067 */ 2068 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2069 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2070 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2071 displs = recvcounts + size; 2072 for (i = 0; i < size; i++) { 2073 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2074 displs[i] = A->rmap->range[i] / bs; 2075 } 2076 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2077 /* --------------------------------------------------------------- 2078 Create the sequential matrix of the same type as the local block diagonal 2079 */ 2080 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2081 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2082 PetscCall(MatSetType(B, MATSEQAIJ)); 2083 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2084 b = (Mat_SeqAIJ *)B->data; 2085 2086 /*-------------------------------------------------------------------- 2087 Copy my part of matrix column indices over 2088 */ 2089 sendcount = ad->nz + bd->nz; 2090 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2091 a_jsendbuf = ad->j; 2092 b_jsendbuf = bd->j; 2093 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2094 cnt = 0; 2095 for (i = 0; i < n; i++) { 2096 /* put in lower diagonal portion */ 2097 m = bd->i[i + 1] - bd->i[i]; 2098 while (m > 0) { 2099 /* is it above diagonal (in bd (compressed) numbering) */ 2100 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2101 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2102 m--; 2103 } 2104 2105 /* put in diagonal portion */ 2106 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2107 2108 /* put in upper diagonal portion */ 2109 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2110 } 2111 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2112 2113 /*-------------------------------------------------------------------- 2114 Gather all column indices to all processors 2115 */ 2116 for (i = 0; i < size; i++) { 2117 recvcounts[i] = 0; 2118 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2119 } 2120 displs[0] = 0; 2121 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2122 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2123 /*-------------------------------------------------------------------- 2124 Assemble the matrix into useable form (note numerical values not yet set) 2125 */ 2126 /* set the b->ilen (length of each row) values */ 2127 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2128 /* set the b->i indices */ 2129 b->i[0] = 0; 2130 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2131 PetscCall(PetscFree(lens)); 2132 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2133 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2134 PetscCall(PetscFree(recvcounts)); 2135 2136 PetscCall(MatPropagateSymmetryOptions(A, B)); 2137 *newmat = B; 2138 PetscFunctionReturn(0); 2139 } 2140 2141 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) { 2142 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2143 Vec bb1 = NULL; 2144 2145 PetscFunctionBegin; 2146 if (flag == SOR_APPLY_UPPER) { 2147 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2148 PetscFunctionReturn(0); 2149 } 2150 2151 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2152 2153 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2154 if (flag & SOR_ZERO_INITIAL_GUESS) { 2155 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2156 its--; 2157 } 2158 2159 while (its--) { 2160 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2161 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2162 2163 /* update rhs: bb1 = bb - B*x */ 2164 PetscCall(VecScale(mat->lvec, -1.0)); 2165 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2166 2167 /* local sweep */ 2168 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2169 } 2170 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2171 if (flag & SOR_ZERO_INITIAL_GUESS) { 2172 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2173 its--; 2174 } 2175 while (its--) { 2176 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2177 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2178 2179 /* update rhs: bb1 = bb - B*x */ 2180 PetscCall(VecScale(mat->lvec, -1.0)); 2181 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2182 2183 /* local sweep */ 2184 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2185 } 2186 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2187 if (flag & SOR_ZERO_INITIAL_GUESS) { 2188 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2189 its--; 2190 } 2191 while (its--) { 2192 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2193 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2194 2195 /* update rhs: bb1 = bb - B*x */ 2196 PetscCall(VecScale(mat->lvec, -1.0)); 2197 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2198 2199 /* local sweep */ 2200 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2201 } 2202 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2203 2204 PetscCall(VecDestroy(&bb1)); 2205 PetscFunctionReturn(0); 2206 } 2207 2208 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) { 2209 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2210 PetscInt m, N, i, *garray = aij->garray; 2211 PetscInt ib, jb, bs = A->rmap->bs; 2212 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2213 MatScalar *a_val = a_aij->a; 2214 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2215 MatScalar *b_val = b_aij->a; 2216 PetscReal *work; 2217 2218 PetscFunctionBegin; 2219 PetscCall(MatGetSize(A, &m, &N)); 2220 PetscCall(PetscCalloc1(N, &work)); 2221 if (type == NORM_2) { 2222 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2223 for (jb = 0; jb < bs; jb++) { 2224 for (ib = 0; ib < bs; ib++) { 2225 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2226 a_val++; 2227 } 2228 } 2229 } 2230 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2231 for (jb = 0; jb < bs; jb++) { 2232 for (ib = 0; ib < bs; ib++) { 2233 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2234 b_val++; 2235 } 2236 } 2237 } 2238 } else if (type == NORM_1) { 2239 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2240 for (jb = 0; jb < bs; jb++) { 2241 for (ib = 0; ib < bs; ib++) { 2242 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2243 a_val++; 2244 } 2245 } 2246 } 2247 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2248 for (jb = 0; jb < bs; jb++) { 2249 for (ib = 0; ib < bs; ib++) { 2250 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2251 b_val++; 2252 } 2253 } 2254 } 2255 } else if (type == NORM_INFINITY) { 2256 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2257 for (jb = 0; jb < bs; jb++) { 2258 for (ib = 0; ib < bs; ib++) { 2259 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2260 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2261 a_val++; 2262 } 2263 } 2264 } 2265 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2266 for (jb = 0; jb < bs; jb++) { 2267 for (ib = 0; ib < bs; ib++) { 2268 int col = garray[b_aij->j[i]] * bs + jb; 2269 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2270 b_val++; 2271 } 2272 } 2273 } 2274 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2275 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2276 for (jb = 0; jb < bs; jb++) { 2277 for (ib = 0; ib < bs; ib++) { 2278 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2279 a_val++; 2280 } 2281 } 2282 } 2283 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2284 for (jb = 0; jb < bs; jb++) { 2285 for (ib = 0; ib < bs; ib++) { 2286 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2287 b_val++; 2288 } 2289 } 2290 } 2291 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2292 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2293 for (jb = 0; jb < bs; jb++) { 2294 for (ib = 0; ib < bs; ib++) { 2295 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2296 a_val++; 2297 } 2298 } 2299 } 2300 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2301 for (jb = 0; jb < bs; jb++) { 2302 for (ib = 0; ib < bs; ib++) { 2303 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2304 b_val++; 2305 } 2306 } 2307 } 2308 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2309 if (type == NORM_INFINITY) { 2310 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2311 } else { 2312 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2313 } 2314 PetscCall(PetscFree(work)); 2315 if (type == NORM_2) { 2316 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2317 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2318 for (i = 0; i < N; i++) reductions[i] /= m; 2319 } 2320 PetscFunctionReturn(0); 2321 } 2322 2323 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) { 2324 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2325 2326 PetscFunctionBegin; 2327 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2328 A->factorerrortype = a->A->factorerrortype; 2329 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2330 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2331 PetscFunctionReturn(0); 2332 } 2333 2334 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) { 2335 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2336 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2337 2338 PetscFunctionBegin; 2339 if (!Y->preallocated) { 2340 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2341 } else if (!aij->nz) { 2342 PetscInt nonew = aij->nonew; 2343 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2344 aij->nonew = nonew; 2345 } 2346 PetscCall(MatShift_Basic(Y, a)); 2347 PetscFunctionReturn(0); 2348 } 2349 2350 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) { 2351 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2352 2353 PetscFunctionBegin; 2354 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2355 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2356 if (d) { 2357 PetscInt rstart; 2358 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2359 *d += rstart / A->rmap->bs; 2360 } 2361 PetscFunctionReturn(0); 2362 } 2363 2364 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) { 2365 PetscFunctionBegin; 2366 *a = ((Mat_MPIBAIJ *)A->data)->A; 2367 PetscFunctionReturn(0); 2368 } 2369 2370 /* -------------------------------------------------------------------*/ 2371 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2372 MatGetRow_MPIBAIJ, 2373 MatRestoreRow_MPIBAIJ, 2374 MatMult_MPIBAIJ, 2375 /* 4*/ MatMultAdd_MPIBAIJ, 2376 MatMultTranspose_MPIBAIJ, 2377 MatMultTransposeAdd_MPIBAIJ, 2378 NULL, 2379 NULL, 2380 NULL, 2381 /*10*/ NULL, 2382 NULL, 2383 NULL, 2384 MatSOR_MPIBAIJ, 2385 MatTranspose_MPIBAIJ, 2386 /*15*/ MatGetInfo_MPIBAIJ, 2387 MatEqual_MPIBAIJ, 2388 MatGetDiagonal_MPIBAIJ, 2389 MatDiagonalScale_MPIBAIJ, 2390 MatNorm_MPIBAIJ, 2391 /*20*/ MatAssemblyBegin_MPIBAIJ, 2392 MatAssemblyEnd_MPIBAIJ, 2393 MatSetOption_MPIBAIJ, 2394 MatZeroEntries_MPIBAIJ, 2395 /*24*/ MatZeroRows_MPIBAIJ, 2396 NULL, 2397 NULL, 2398 NULL, 2399 NULL, 2400 /*29*/ MatSetUp_MPIBAIJ, 2401 NULL, 2402 NULL, 2403 MatGetDiagonalBlock_MPIBAIJ, 2404 NULL, 2405 /*34*/ MatDuplicate_MPIBAIJ, 2406 NULL, 2407 NULL, 2408 NULL, 2409 NULL, 2410 /*39*/ MatAXPY_MPIBAIJ, 2411 MatCreateSubMatrices_MPIBAIJ, 2412 MatIncreaseOverlap_MPIBAIJ, 2413 MatGetValues_MPIBAIJ, 2414 MatCopy_MPIBAIJ, 2415 /*44*/ NULL, 2416 MatScale_MPIBAIJ, 2417 MatShift_MPIBAIJ, 2418 NULL, 2419 MatZeroRowsColumns_MPIBAIJ, 2420 /*49*/ NULL, 2421 NULL, 2422 NULL, 2423 NULL, 2424 NULL, 2425 /*54*/ MatFDColoringCreate_MPIXAIJ, 2426 NULL, 2427 MatSetUnfactored_MPIBAIJ, 2428 MatPermute_MPIBAIJ, 2429 MatSetValuesBlocked_MPIBAIJ, 2430 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2431 MatDestroy_MPIBAIJ, 2432 MatView_MPIBAIJ, 2433 NULL, 2434 NULL, 2435 /*64*/ NULL, 2436 NULL, 2437 NULL, 2438 NULL, 2439 NULL, 2440 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2441 NULL, 2442 NULL, 2443 NULL, 2444 NULL, 2445 /*74*/ NULL, 2446 MatFDColoringApply_BAIJ, 2447 NULL, 2448 NULL, 2449 NULL, 2450 /*79*/ NULL, 2451 NULL, 2452 NULL, 2453 NULL, 2454 MatLoad_MPIBAIJ, 2455 /*84*/ NULL, 2456 NULL, 2457 NULL, 2458 NULL, 2459 NULL, 2460 /*89*/ NULL, 2461 NULL, 2462 NULL, 2463 NULL, 2464 NULL, 2465 /*94*/ NULL, 2466 NULL, 2467 NULL, 2468 NULL, 2469 NULL, 2470 /*99*/ NULL, 2471 NULL, 2472 NULL, 2473 MatConjugate_MPIBAIJ, 2474 NULL, 2475 /*104*/ NULL, 2476 MatRealPart_MPIBAIJ, 2477 MatImaginaryPart_MPIBAIJ, 2478 NULL, 2479 NULL, 2480 /*109*/ NULL, 2481 NULL, 2482 NULL, 2483 NULL, 2484 MatMissingDiagonal_MPIBAIJ, 2485 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2486 NULL, 2487 MatGetGhosts_MPIBAIJ, 2488 NULL, 2489 NULL, 2490 /*119*/ NULL, 2491 NULL, 2492 NULL, 2493 NULL, 2494 MatGetMultiProcBlock_MPIBAIJ, 2495 /*124*/ NULL, 2496 MatGetColumnReductions_MPIBAIJ, 2497 MatInvertBlockDiagonal_MPIBAIJ, 2498 NULL, 2499 NULL, 2500 /*129*/ NULL, 2501 NULL, 2502 NULL, 2503 NULL, 2504 NULL, 2505 /*134*/ NULL, 2506 NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 /*139*/ MatSetBlockSizes_Default, 2511 NULL, 2512 NULL, 2513 MatFDColoringSetUp_MPIXAIJ, 2514 NULL, 2515 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2516 NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*150*/ NULL}; 2522 2523 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2524 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2525 2526 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) { 2527 PetscInt m, rstart, cstart, cend; 2528 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2529 const PetscInt *JJ = NULL; 2530 PetscScalar *values = NULL; 2531 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2532 PetscBool nooffprocentries; 2533 2534 PetscFunctionBegin; 2535 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2536 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2537 PetscCall(PetscLayoutSetUp(B->rmap)); 2538 PetscCall(PetscLayoutSetUp(B->cmap)); 2539 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2540 m = B->rmap->n / bs; 2541 rstart = B->rmap->rstart / bs; 2542 cstart = B->cmap->rstart / bs; 2543 cend = B->cmap->rend / bs; 2544 2545 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2546 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2547 for (i = 0; i < m; i++) { 2548 nz = ii[i + 1] - ii[i]; 2549 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2550 nz_max = PetscMax(nz_max, nz); 2551 dlen = 0; 2552 olen = 0; 2553 JJ = jj + ii[i]; 2554 for (j = 0; j < nz; j++) { 2555 if (*JJ < cstart || *JJ >= cend) olen++; 2556 else dlen++; 2557 JJ++; 2558 } 2559 d_nnz[i] = dlen; 2560 o_nnz[i] = olen; 2561 } 2562 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2563 PetscCall(PetscFree2(d_nnz, o_nnz)); 2564 2565 values = (PetscScalar *)V; 2566 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2567 for (i = 0; i < m; i++) { 2568 PetscInt row = i + rstart; 2569 PetscInt ncols = ii[i + 1] - ii[i]; 2570 const PetscInt *icols = jj + ii[i]; 2571 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2572 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2573 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2574 } else { /* block ordering does not match so we can only insert one block at a time. */ 2575 PetscInt j; 2576 for (j = 0; j < ncols; j++) { 2577 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2578 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2579 } 2580 } 2581 } 2582 2583 if (!V) PetscCall(PetscFree(values)); 2584 nooffprocentries = B->nooffprocentries; 2585 B->nooffprocentries = PETSC_TRUE; 2586 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2587 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2588 B->nooffprocentries = nooffprocentries; 2589 2590 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2591 PetscFunctionReturn(0); 2592 } 2593 2594 /*@C 2595 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2596 2597 Collective 2598 2599 Input Parameters: 2600 + B - the matrix 2601 . bs - the block size 2602 . i - the indices into j for the start of each local row (starts with zero) 2603 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2604 - v - optional values in the matrix 2605 2606 Level: advanced 2607 2608 Notes: 2609 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2610 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2611 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2612 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2613 block column and the second index is over columns within a block. 2614 2615 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2616 2617 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2618 @*/ 2619 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) { 2620 PetscFunctionBegin; 2621 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2622 PetscValidType(B, 1); 2623 PetscValidLogicalCollectiveInt(B, bs, 2); 2624 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2625 PetscFunctionReturn(0); 2626 } 2627 2628 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) { 2629 Mat_MPIBAIJ *b; 2630 PetscInt i; 2631 PetscMPIInt size; 2632 2633 PetscFunctionBegin; 2634 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2635 PetscCall(PetscLayoutSetUp(B->rmap)); 2636 PetscCall(PetscLayoutSetUp(B->cmap)); 2637 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2638 2639 if (d_nnz) { 2640 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2641 } 2642 if (o_nnz) { 2643 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2644 } 2645 2646 b = (Mat_MPIBAIJ *)B->data; 2647 b->bs2 = bs * bs; 2648 b->mbs = B->rmap->n / bs; 2649 b->nbs = B->cmap->n / bs; 2650 b->Mbs = B->rmap->N / bs; 2651 b->Nbs = B->cmap->N / bs; 2652 2653 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2654 b->rstartbs = B->rmap->rstart / bs; 2655 b->rendbs = B->rmap->rend / bs; 2656 b->cstartbs = B->cmap->rstart / bs; 2657 b->cendbs = B->cmap->rend / bs; 2658 2659 #if defined(PETSC_USE_CTABLE) 2660 PetscCall(PetscTableDestroy(&b->colmap)); 2661 #else 2662 PetscCall(PetscFree(b->colmap)); 2663 #endif 2664 PetscCall(PetscFree(b->garray)); 2665 PetscCall(VecDestroy(&b->lvec)); 2666 PetscCall(VecScatterDestroy(&b->Mvctx)); 2667 2668 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2669 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2670 PetscCall(MatDestroy(&b->B)); 2671 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2672 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2673 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2674 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->B)); 2675 2676 if (!B->preallocated) { 2677 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2678 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2679 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2680 PetscCall(PetscLogObjectParent((PetscObject)B, (PetscObject)b->A)); 2681 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2682 } 2683 2684 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2685 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2686 B->preallocated = PETSC_TRUE; 2687 B->was_assembled = PETSC_FALSE; 2688 B->assembled = PETSC_FALSE; 2689 PetscFunctionReturn(0); 2690 } 2691 2692 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2693 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2694 2695 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) { 2696 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2697 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2698 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2699 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2700 2701 PetscFunctionBegin; 2702 PetscCall(PetscMalloc1(M + 1, &ii)); 2703 ii[0] = 0; 2704 for (i = 0; i < M; i++) { 2705 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2706 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2707 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2708 /* remove one from count of matrix has diagonal */ 2709 for (j = id[i]; j < id[i + 1]; j++) { 2710 if (jd[j] == i) { 2711 ii[i + 1]--; 2712 break; 2713 } 2714 } 2715 } 2716 PetscCall(PetscMalloc1(ii[M], &jj)); 2717 cnt = 0; 2718 for (i = 0; i < M; i++) { 2719 for (j = io[i]; j < io[i + 1]; j++) { 2720 if (garray[jo[j]] > rstart) break; 2721 jj[cnt++] = garray[jo[j]]; 2722 } 2723 for (k = id[i]; k < id[i + 1]; k++) { 2724 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2725 } 2726 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2727 } 2728 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2729 PetscFunctionReturn(0); 2730 } 2731 2732 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2733 2734 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2735 2736 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) { 2737 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2738 Mat_MPIAIJ *b; 2739 Mat B; 2740 2741 PetscFunctionBegin; 2742 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2743 2744 if (reuse == MAT_REUSE_MATRIX) { 2745 B = *newmat; 2746 } else { 2747 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2748 PetscCall(MatSetType(B, MATMPIAIJ)); 2749 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2750 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2751 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2752 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2753 } 2754 b = (Mat_MPIAIJ *)B->data; 2755 2756 if (reuse == MAT_REUSE_MATRIX) { 2757 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2758 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2759 } else { 2760 PetscCall(MatDestroy(&b->A)); 2761 PetscCall(MatDestroy(&b->B)); 2762 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2763 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2764 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2765 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2766 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2767 } 2768 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2769 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2770 2771 if (reuse == MAT_INPLACE_MATRIX) { 2772 PetscCall(MatHeaderReplace(A, &B)); 2773 } else { 2774 *newmat = B; 2775 } 2776 PetscFunctionReturn(0); 2777 } 2778 2779 /*MC 2780 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2781 2782 Options Database Keys: 2783 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2784 . -mat_block_size <bs> - set the blocksize used to store the matrix 2785 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2786 - -mat_use_hash_table <fact> - set hash table factor 2787 2788 Level: beginner 2789 2790 Note: 2791 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2792 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2793 2794 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2795 M*/ 2796 2797 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2798 2799 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) { 2800 Mat_MPIBAIJ *b; 2801 PetscBool flg = PETSC_FALSE; 2802 2803 PetscFunctionBegin; 2804 PetscCall(PetscNewLog(B, &b)); 2805 B->data = (void *)b; 2806 2807 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2808 B->assembled = PETSC_FALSE; 2809 2810 B->insertmode = NOT_SET_VALUES; 2811 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2812 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2813 2814 /* build local table of row and column ownerships */ 2815 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2816 2817 /* build cache for off array entries formed */ 2818 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2819 2820 b->donotstash = PETSC_FALSE; 2821 b->colmap = NULL; 2822 b->garray = NULL; 2823 b->roworiented = PETSC_TRUE; 2824 2825 /* stuff used in block assembly */ 2826 b->barray = NULL; 2827 2828 /* stuff used for matrix vector multiply */ 2829 b->lvec = NULL; 2830 b->Mvctx = NULL; 2831 2832 /* stuff for MatGetRow() */ 2833 b->rowindices = NULL; 2834 b->rowvalues = NULL; 2835 b->getrowactive = PETSC_FALSE; 2836 2837 /* hash table stuff */ 2838 b->ht = NULL; 2839 b->hd = NULL; 2840 b->ht_size = 0; 2841 b->ht_flag = PETSC_FALSE; 2842 b->ht_fact = 0; 2843 b->ht_total_ct = 0; 2844 b->ht_insert_ct = 0; 2845 2846 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2847 b->ijonly = PETSC_FALSE; 2848 2849 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2850 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2851 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2852 #if defined(PETSC_HAVE_HYPRE) 2853 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2854 #endif 2855 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2856 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2857 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2858 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2859 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2860 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2861 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2862 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2863 2864 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2865 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2866 if (flg) { 2867 PetscReal fact = 1.39; 2868 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2869 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2870 if (fact <= 1.0) fact = 1.39; 2871 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2872 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2873 } 2874 PetscOptionsEnd(); 2875 PetscFunctionReturn(0); 2876 } 2877 2878 /*MC 2879 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2880 2881 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2882 and `MATMPIBAIJ` otherwise. 2883 2884 Options Database Keys: 2885 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2886 2887 Level: beginner 2888 2889 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2890 M*/ 2891 2892 /*@C 2893 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2894 (block compressed row). For good matrix assembly performance 2895 the user should preallocate the matrix storage by setting the parameters 2896 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2897 performance can be increased by more than a factor of 50. 2898 2899 Collective on Mat 2900 2901 Input Parameters: 2902 + B - the matrix 2903 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2904 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2905 . d_nz - number of block nonzeros per block row in diagonal portion of local 2906 submatrix (same for all local rows) 2907 . d_nnz - array containing the number of block nonzeros in the various block rows 2908 of the in diagonal portion of the local (possibly different for each block 2909 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2910 set it even if it is zero. 2911 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2912 submatrix (same for all local rows). 2913 - o_nnz - array containing the number of nonzeros in the various block rows of the 2914 off-diagonal portion of the local submatrix (possibly different for 2915 each block row) or NULL. 2916 2917 If the *_nnz parameter is given then the *_nz parameter is ignored 2918 2919 Options Database Keys: 2920 + -mat_block_size - size of the blocks to use 2921 - -mat_use_hash_table <fact> - set hash table factor 2922 2923 Notes: 2924 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2925 than it must be used on all processors that share the object for that argument. 2926 2927 Storage Information: 2928 For a square global matrix we define each processor's diagonal portion 2929 to be its local rows and the corresponding columns (a square submatrix); 2930 each processor's off-diagonal portion encompasses the remainder of the 2931 local matrix (a rectangular submatrix). 2932 2933 The user can specify preallocated storage for the diagonal part of 2934 the local submatrix with either d_nz or d_nnz (not both). Set 2935 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 2936 memory allocation. Likewise, specify preallocated storage for the 2937 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 2938 2939 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 2940 the figure below we depict these three local rows and all columns (0-11). 2941 2942 .vb 2943 0 1 2 3 4 5 6 7 8 9 10 11 2944 -------------------------- 2945 row 3 |o o o d d d o o o o o o 2946 row 4 |o o o d d d o o o o o o 2947 row 5 |o o o d d d o o o o o o 2948 -------------------------- 2949 .ve 2950 2951 Thus, any entries in the d locations are stored in the d (diagonal) 2952 submatrix, and any entries in the o locations are stored in the 2953 o (off-diagonal) submatrix. Note that the d and the o submatrices are 2954 stored simply in the `MATSEQBAIJ` format for compressed row storage. 2955 2956 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 2957 and o_nz should indicate the number of block nonzeros per row in the o matrix. 2958 In general, for PDE problems in which most nonzeros are near the diagonal, 2959 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 2960 or you will get TERRIBLE performance; see the users' manual chapter on 2961 matrices. 2962 2963 You can call `MatGetInfo()` to get information on how effective the preallocation was; 2964 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 2965 You can also run with the option -info and look for messages with the string 2966 malloc in them to see if additional memory allocation was needed. 2967 2968 Level: intermediate 2969 2970 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 2971 @*/ 2972 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) { 2973 PetscFunctionBegin; 2974 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2975 PetscValidType(B, 1); 2976 PetscValidLogicalCollectiveInt(B, bs, 2); 2977 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 2978 PetscFunctionReturn(0); 2979 } 2980 2981 /*@C 2982 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 2983 (block compressed row). For good matrix assembly performance 2984 the user should preallocate the matrix storage by setting the parameters 2985 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2986 performance can be increased by more than a factor of 50. 2987 2988 Collective 2989 2990 Input Parameters: 2991 + comm - MPI communicator 2992 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2993 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 2994 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 2995 This value should be the same as the local size used in creating the 2996 y vector for the matrix-vector product y = Ax. 2997 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 2998 This value should be the same as the local size used in creating the 2999 x vector for the matrix-vector product y = Ax. 3000 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3001 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3002 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3003 submatrix (same for all local rows) 3004 . d_nnz - array containing the number of nonzero blocks in the various block rows 3005 of the in diagonal portion of the local (possibly different for each block 3006 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3007 and set it even if it is zero. 3008 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3009 submatrix (same for all local rows). 3010 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3011 off-diagonal portion of the local submatrix (possibly different for 3012 each block row) or NULL. 3013 3014 Output Parameter: 3015 . A - the matrix 3016 3017 Options Database Keys: 3018 + -mat_block_size - size of the blocks to use 3019 - -mat_use_hash_table <fact> - set hash table factor 3020 3021 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3022 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3023 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3024 3025 Notes: 3026 If the *_nnz parameter is given then the *_nz parameter is ignored 3027 3028 A nonzero block is any block that as 1 or more nonzeros in it 3029 3030 The user MUST specify either the local or global matrix dimensions 3031 (possibly both). 3032 3033 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3034 than it must be used on all processors that share the object for that argument. 3035 3036 Storage Information: 3037 For a square global matrix we define each processor's diagonal portion 3038 to be its local rows and the corresponding columns (a square submatrix); 3039 each processor's off-diagonal portion encompasses the remainder of the 3040 local matrix (a rectangular submatrix). 3041 3042 The user can specify preallocated storage for the diagonal part of 3043 the local submatrix with either d_nz or d_nnz (not both). Set 3044 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3045 memory allocation. Likewise, specify preallocated storage for the 3046 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3047 3048 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3049 the figure below we depict these three local rows and all columns (0-11). 3050 3051 .vb 3052 0 1 2 3 4 5 6 7 8 9 10 11 3053 -------------------------- 3054 row 3 |o o o d d d o o o o o o 3055 row 4 |o o o d d d o o o o o o 3056 row 5 |o o o d d d o o o o o o 3057 -------------------------- 3058 .ve 3059 3060 Thus, any entries in the d locations are stored in the d (diagonal) 3061 submatrix, and any entries in the o locations are stored in the 3062 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3063 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3064 3065 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3066 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3067 In general, for PDE problems in which most nonzeros are near the diagonal, 3068 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3069 or you will get TERRIBLE performance; see the users' manual chapter on 3070 matrices. 3071 3072 Level: intermediate 3073 3074 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3075 @*/ 3076 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) { 3077 PetscMPIInt size; 3078 3079 PetscFunctionBegin; 3080 PetscCall(MatCreate(comm, A)); 3081 PetscCall(MatSetSizes(*A, m, n, M, N)); 3082 PetscCallMPI(MPI_Comm_size(comm, &size)); 3083 if (size > 1) { 3084 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3085 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3086 } else { 3087 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3088 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3089 } 3090 PetscFunctionReturn(0); 3091 } 3092 3093 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) { 3094 Mat mat; 3095 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3096 PetscInt len = 0; 3097 3098 PetscFunctionBegin; 3099 *newmat = NULL; 3100 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3101 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3102 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3103 3104 mat->factortype = matin->factortype; 3105 mat->preallocated = PETSC_TRUE; 3106 mat->assembled = PETSC_TRUE; 3107 mat->insertmode = NOT_SET_VALUES; 3108 3109 a = (Mat_MPIBAIJ *)mat->data; 3110 mat->rmap->bs = matin->rmap->bs; 3111 a->bs2 = oldmat->bs2; 3112 a->mbs = oldmat->mbs; 3113 a->nbs = oldmat->nbs; 3114 a->Mbs = oldmat->Mbs; 3115 a->Nbs = oldmat->Nbs; 3116 3117 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3118 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3119 3120 a->size = oldmat->size; 3121 a->rank = oldmat->rank; 3122 a->donotstash = oldmat->donotstash; 3123 a->roworiented = oldmat->roworiented; 3124 a->rowindices = NULL; 3125 a->rowvalues = NULL; 3126 a->getrowactive = PETSC_FALSE; 3127 a->barray = NULL; 3128 a->rstartbs = oldmat->rstartbs; 3129 a->rendbs = oldmat->rendbs; 3130 a->cstartbs = oldmat->cstartbs; 3131 a->cendbs = oldmat->cendbs; 3132 3133 /* hash table stuff */ 3134 a->ht = NULL; 3135 a->hd = NULL; 3136 a->ht_size = 0; 3137 a->ht_flag = oldmat->ht_flag; 3138 a->ht_fact = oldmat->ht_fact; 3139 a->ht_total_ct = 0; 3140 a->ht_insert_ct = 0; 3141 3142 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3143 if (oldmat->colmap) { 3144 #if defined(PETSC_USE_CTABLE) 3145 PetscCall(PetscTableCreateCopy(oldmat->colmap, &a->colmap)); 3146 #else 3147 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3148 PetscCall(PetscLogObjectMemory((PetscObject)mat, (a->Nbs) * sizeof(PetscInt))); 3149 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3150 #endif 3151 } else a->colmap = NULL; 3152 3153 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3154 PetscCall(PetscMalloc1(len, &a->garray)); 3155 PetscCall(PetscLogObjectMemory((PetscObject)mat, len * sizeof(PetscInt))); 3156 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3157 } else a->garray = NULL; 3158 3159 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3160 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3161 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->lvec)); 3162 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3163 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->Mvctx)); 3164 3165 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3166 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->A)); 3167 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3168 PetscCall(PetscLogObjectParent((PetscObject)mat, (PetscObject)a->B)); 3169 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3170 *newmat = mat; 3171 PetscFunctionReturn(0); 3172 } 3173 3174 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3175 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) { 3176 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3177 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3178 PetscScalar *matvals; 3179 3180 PetscFunctionBegin; 3181 PetscCall(PetscViewerSetUp(viewer)); 3182 3183 /* read in matrix header */ 3184 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3185 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3186 M = header[1]; 3187 N = header[2]; 3188 nz = header[3]; 3189 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3190 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3191 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3192 3193 /* set block sizes from the viewer's .info file */ 3194 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3195 /* set local sizes if not set already */ 3196 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3197 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3198 /* set global sizes if not set already */ 3199 if (mat->rmap->N < 0) mat->rmap->N = M; 3200 if (mat->cmap->N < 0) mat->cmap->N = N; 3201 PetscCall(PetscLayoutSetUp(mat->rmap)); 3202 PetscCall(PetscLayoutSetUp(mat->cmap)); 3203 3204 /* check if the matrix sizes are correct */ 3205 PetscCall(MatGetSize(mat, &rows, &cols)); 3206 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3207 PetscCall(MatGetBlockSize(mat, &bs)); 3208 PetscCall(MatGetLocalSize(mat, &m, &n)); 3209 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3210 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3211 mbs = m / bs; 3212 nbs = n / bs; 3213 3214 /* read in row lengths and build row indices */ 3215 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3216 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3217 rowidxs[0] = 0; 3218 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3219 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3220 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3221 3222 /* read in column indices and matrix values */ 3223 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3224 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3225 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3226 3227 { /* preallocate matrix storage */ 3228 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3229 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3230 PetscBool sbaij, done; 3231 PetscInt *d_nnz, *o_nnz; 3232 3233 PetscCall(PetscBTCreate(nbs, &bt)); 3234 PetscCall(PetscHSetICreate(&ht)); 3235 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3236 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3237 for (i = 0; i < mbs; i++) { 3238 PetscCall(PetscBTMemzero(nbs, bt)); 3239 PetscCall(PetscHSetIClear(ht)); 3240 for (k = 0; k < bs; k++) { 3241 PetscInt row = bs * i + k; 3242 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3243 PetscInt col = colidxs[j]; 3244 if (!sbaij || col >= row) { 3245 if (col >= cs && col < ce) { 3246 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3247 } else { 3248 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3249 if (done) o_nnz[i]++; 3250 } 3251 } 3252 } 3253 } 3254 } 3255 PetscCall(PetscBTDestroy(&bt)); 3256 PetscCall(PetscHSetIDestroy(&ht)); 3257 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3258 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3259 PetscCall(PetscFree2(d_nnz, o_nnz)); 3260 } 3261 3262 /* store matrix values */ 3263 for (i = 0; i < m; i++) { 3264 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3265 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3266 } 3267 3268 PetscCall(PetscFree(rowidxs)); 3269 PetscCall(PetscFree2(colidxs, matvals)); 3270 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3271 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3272 PetscFunctionReturn(0); 3273 } 3274 3275 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) { 3276 PetscBool isbinary; 3277 3278 PetscFunctionBegin; 3279 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3280 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3281 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3282 PetscFunctionReturn(0); 3283 } 3284 3285 /*@ 3286 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3287 3288 Input Parameters: 3289 + mat - the matrix 3290 - fact - factor 3291 3292 Options Database Key: 3293 . -mat_use_hash_table <fact> - provide the factor 3294 3295 Level: advanced 3296 3297 .seealso: `MATMPIBAIJ`, `MatSetOption()` 3298 @*/ 3299 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) { 3300 PetscFunctionBegin; 3301 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3302 PetscFunctionReturn(0); 3303 } 3304 3305 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) { 3306 Mat_MPIBAIJ *baij; 3307 3308 PetscFunctionBegin; 3309 baij = (Mat_MPIBAIJ *)mat->data; 3310 baij->ht_fact = fact; 3311 PetscFunctionReturn(0); 3312 } 3313 3314 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) { 3315 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3316 PetscBool flg; 3317 3318 PetscFunctionBegin; 3319 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3320 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3321 if (Ad) *Ad = a->A; 3322 if (Ao) *Ao = a->B; 3323 if (colmap) *colmap = a->garray; 3324 PetscFunctionReturn(0); 3325 } 3326 3327 /* 3328 Special version for direct calls from Fortran (to eliminate two function call overheads 3329 */ 3330 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3331 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3332 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3333 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3334 #endif 3335 3336 /*@C 3337 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3338 3339 Collective on Mat 3340 3341 Input Parameters: 3342 + mat - the matrix 3343 . min - number of input rows 3344 . im - input rows 3345 . nin - number of input columns 3346 . in - input columns 3347 . v - numerical values input 3348 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3349 3350 Developer Note: 3351 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3352 3353 Level: advanced 3354 3355 .seealso: `MatSetValuesBlocked()` 3356 @*/ 3357 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) { 3358 /* convert input arguments to C version */ 3359 Mat mat = *matin; 3360 PetscInt m = *min, n = *nin; 3361 InsertMode addv = *addvin; 3362 3363 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3364 const MatScalar *value; 3365 MatScalar *barray = baij->barray; 3366 PetscBool roworiented = baij->roworiented; 3367 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3368 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3369 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3370 3371 PetscFunctionBegin; 3372 /* tasks normally handled by MatSetValuesBlocked() */ 3373 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3374 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3375 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3376 if (mat->assembled) { 3377 mat->was_assembled = PETSC_TRUE; 3378 mat->assembled = PETSC_FALSE; 3379 } 3380 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3381 3382 if (!barray) { 3383 PetscCall(PetscMalloc1(bs2, &barray)); 3384 baij->barray = barray; 3385 } 3386 3387 if (roworiented) stepval = (n - 1) * bs; 3388 else stepval = (m - 1) * bs; 3389 3390 for (i = 0; i < m; i++) { 3391 if (im[i] < 0) continue; 3392 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3393 if (im[i] >= rstart && im[i] < rend) { 3394 row = im[i] - rstart; 3395 for (j = 0; j < n; j++) { 3396 /* If NumCol = 1 then a copy is not required */ 3397 if ((roworiented) && (n == 1)) { 3398 barray = (MatScalar *)v + i * bs2; 3399 } else if ((!roworiented) && (m == 1)) { 3400 barray = (MatScalar *)v + j * bs2; 3401 } else { /* Here a copy is required */ 3402 if (roworiented) { 3403 value = v + i * (stepval + bs) * bs + j * bs; 3404 } else { 3405 value = v + j * (stepval + bs) * bs + i * bs; 3406 } 3407 for (ii = 0; ii < bs; ii++, value += stepval) { 3408 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3409 } 3410 barray -= bs2; 3411 } 3412 3413 if (in[j] >= cstart && in[j] < cend) { 3414 col = in[j] - cstart; 3415 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3416 } else if (in[j] < 0) { 3417 continue; 3418 } else { 3419 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3420 if (mat->was_assembled) { 3421 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3422 3423 #if defined(PETSC_USE_DEBUG) 3424 #if defined(PETSC_USE_CTABLE) 3425 { 3426 PetscInt data; 3427 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &data)); 3428 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3429 } 3430 #else 3431 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3432 #endif 3433 #endif 3434 #if defined(PETSC_USE_CTABLE) 3435 PetscCall(PetscTableFind(baij->colmap, in[j] + 1, &col)); 3436 col = (col - 1) / bs; 3437 #else 3438 col = (baij->colmap[in[j]] - 1) / bs; 3439 #endif 3440 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3441 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3442 col = in[j]; 3443 } 3444 } else col = in[j]; 3445 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3446 } 3447 } 3448 } else { 3449 if (!baij->donotstash) { 3450 if (roworiented) { 3451 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3452 } else { 3453 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3454 } 3455 } 3456 } 3457 } 3458 3459 /* task normally handled by MatSetValuesBlocked() */ 3460 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3461 PetscFunctionReturn(0); 3462 } 3463 3464 /*@ 3465 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3466 CSR format the local rows. 3467 3468 Collective 3469 3470 Input Parameters: 3471 + comm - MPI communicator 3472 . bs - the block size, only a block size of 1 is supported 3473 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3474 . n - This value should be the same as the local size used in creating the 3475 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3476 calculated if N is given) For square matrices n is almost always m. 3477 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3478 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3479 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3480 . j - column indices 3481 - a - matrix values 3482 3483 Output Parameter: 3484 . mat - the matrix 3485 3486 Level: intermediate 3487 3488 Notes: 3489 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3490 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3491 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3492 3493 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3494 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3495 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3496 with column-major ordering within blocks. 3497 3498 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3499 3500 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3501 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3502 @*/ 3503 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) { 3504 PetscFunctionBegin; 3505 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3506 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3507 PetscCall(MatCreate(comm, mat)); 3508 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3509 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3510 PetscCall(MatSetBlockSize(*mat, bs)); 3511 PetscCall(MatSetUp(*mat)); 3512 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3513 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3514 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3515 PetscFunctionReturn(0); 3516 } 3517 3518 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) { 3519 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3520 PetscInt *indx; 3521 PetscScalar *values; 3522 3523 PetscFunctionBegin; 3524 PetscCall(MatGetSize(inmat, &m, &N)); 3525 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3526 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3527 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3528 PetscInt *bindx, rmax = a->rmax, j; 3529 PetscMPIInt rank, size; 3530 3531 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3532 mbs = m / bs; 3533 Nbs = N / cbs; 3534 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3535 nbs = n / cbs; 3536 3537 PetscCall(PetscMalloc1(rmax, &bindx)); 3538 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3539 3540 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3541 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3542 if (rank == size - 1) { 3543 /* Check sum(nbs) = Nbs */ 3544 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3545 } 3546 3547 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3548 for (i = 0; i < mbs; i++) { 3549 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3550 nnz = nnz / bs; 3551 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3552 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3553 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3554 } 3555 PetscCall(PetscFree(bindx)); 3556 3557 PetscCall(MatCreate(comm, outmat)); 3558 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3559 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3560 PetscCall(MatSetType(*outmat, MATBAIJ)); 3561 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3562 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3563 MatPreallocateEnd(dnz, onz); 3564 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3565 } 3566 3567 /* numeric phase */ 3568 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3569 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3570 3571 for (i = 0; i < m; i++) { 3572 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3573 Ii = i + rstart; 3574 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3575 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3576 } 3577 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3578 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3579 PetscFunctionReturn(0); 3580 } 3581