1 #include <../src/mat/impls/baij/mpi/mpibaij.h> /*I "petscmat.h" I*/ 2 3 #include <petsc/private/hashseti.h> 4 #include <petscblaslapack.h> 5 #include <petscsf.h> 6 7 #if defined(PETSC_HAVE_HYPRE) 8 PETSC_INTERN PetscErrorCode MatConvert_AIJ_HYPRE(Mat, MatType, MatReuse, Mat *); 9 #endif 10 11 PetscErrorCode MatGetRowMaxAbs_MPIBAIJ(Mat A, Vec v, PetscInt idx[]) 12 { 13 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 14 PetscInt i, *idxb = NULL, m = A->rmap->n, bs = A->cmap->bs; 15 PetscScalar *va, *vv; 16 Vec vB, vA; 17 const PetscScalar *vb; 18 19 PetscFunctionBegin; 20 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vA)); 21 PetscCall(MatGetRowMaxAbs(a->A, vA, idx)); 22 23 PetscCall(VecGetArrayWrite(vA, &va)); 24 if (idx) { 25 for (i = 0; i < m; i++) { 26 if (PetscAbsScalar(va[i])) idx[i] += A->cmap->rstart; 27 } 28 } 29 30 PetscCall(VecCreateSeq(PETSC_COMM_SELF, m, &vB)); 31 PetscCall(PetscMalloc1(m, &idxb)); 32 PetscCall(MatGetRowMaxAbs(a->B, vB, idxb)); 33 34 PetscCall(VecGetArrayWrite(v, &vv)); 35 PetscCall(VecGetArrayRead(vB, &vb)); 36 for (i = 0; i < m; i++) { 37 if (PetscAbsScalar(va[i]) < PetscAbsScalar(vb[i])) { 38 vv[i] = vb[i]; 39 if (idx) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 40 } else { 41 vv[i] = va[i]; 42 if (idx && PetscAbsScalar(va[i]) == PetscAbsScalar(vb[i]) && idxb[i] != -1 && idx[i] > bs * a->garray[idxb[i] / bs] + (idxb[i] % bs)) idx[i] = bs * a->garray[idxb[i] / bs] + (idxb[i] % bs); 43 } 44 } 45 PetscCall(VecRestoreArrayWrite(vA, &vv)); 46 PetscCall(VecRestoreArrayWrite(vA, &va)); 47 PetscCall(VecRestoreArrayRead(vB, &vb)); 48 PetscCall(PetscFree(idxb)); 49 PetscCall(VecDestroy(&vA)); 50 PetscCall(VecDestroy(&vB)); 51 PetscFunctionReturn(0); 52 } 53 54 PetscErrorCode MatStoreValues_MPIBAIJ(Mat mat) 55 { 56 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 57 58 PetscFunctionBegin; 59 PetscCall(MatStoreValues(aij->A)); 60 PetscCall(MatStoreValues(aij->B)); 61 PetscFunctionReturn(0); 62 } 63 64 PetscErrorCode MatRetrieveValues_MPIBAIJ(Mat mat) 65 { 66 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 67 68 PetscFunctionBegin; 69 PetscCall(MatRetrieveValues(aij->A)); 70 PetscCall(MatRetrieveValues(aij->B)); 71 PetscFunctionReturn(0); 72 } 73 74 /* 75 Local utility routine that creates a mapping from the global column 76 number to the local number in the off-diagonal part of the local 77 storage of the matrix. This is done in a non scalable way since the 78 length of colmap equals the global matrix length. 79 */ 80 PetscErrorCode MatCreateColmap_MPIBAIJ_Private(Mat mat) 81 { 82 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 83 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 84 PetscInt nbs = B->nbs, i, bs = mat->rmap->bs; 85 86 PetscFunctionBegin; 87 #if defined(PETSC_USE_CTABLE) 88 PetscCall(PetscHMapICreateWithSize(baij->nbs, &baij->colmap)); 89 for (i = 0; i < nbs; i++) PetscCall(PetscHMapISet(baij->colmap, baij->garray[i] + 1, i * bs + 1)); 90 #else 91 PetscCall(PetscCalloc1(baij->Nbs + 1, &baij->colmap)); 92 for (i = 0; i < nbs; i++) baij->colmap[baij->garray[i]] = i * bs + 1; 93 #endif 94 PetscFunctionReturn(0); 95 } 96 97 #define MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, orow, ocol) \ 98 { \ 99 brow = row / bs; \ 100 rp = aj + ai[brow]; \ 101 ap = aa + bs2 * ai[brow]; \ 102 rmax = aimax[brow]; \ 103 nrow = ailen[brow]; \ 104 bcol = col / bs; \ 105 ridx = row % bs; \ 106 cidx = col % bs; \ 107 low = 0; \ 108 high = nrow; \ 109 while (high - low > 3) { \ 110 t = (low + high) / 2; \ 111 if (rp[t] > bcol) high = t; \ 112 else low = t; \ 113 } \ 114 for (_i = low; _i < high; _i++) { \ 115 if (rp[_i] > bcol) break; \ 116 if (rp[_i] == bcol) { \ 117 bap = ap + bs2 * _i + bs * cidx + ridx; \ 118 if (addv == ADD_VALUES) *bap += value; \ 119 else *bap = value; \ 120 goto a_noinsert; \ 121 } \ 122 } \ 123 if (a->nonew == 1) goto a_noinsert; \ 124 PetscCheck(a->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 125 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, brow, bcol, rmax, aa, ai, aj, rp, ap, aimax, a->nonew, MatScalar); \ 126 N = nrow++ - 1; \ 127 /* shift up all the later entries in this row */ \ 128 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 129 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 130 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 131 rp[_i] = bcol; \ 132 ap[bs2 * _i + bs * cidx + ridx] = value; \ 133 a_noinsert:; \ 134 ailen[brow] = nrow; \ 135 } 136 137 #define MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, orow, ocol) \ 138 { \ 139 brow = row / bs; \ 140 rp = bj + bi[brow]; \ 141 ap = ba + bs2 * bi[brow]; \ 142 rmax = bimax[brow]; \ 143 nrow = bilen[brow]; \ 144 bcol = col / bs; \ 145 ridx = row % bs; \ 146 cidx = col % bs; \ 147 low = 0; \ 148 high = nrow; \ 149 while (high - low > 3) { \ 150 t = (low + high) / 2; \ 151 if (rp[t] > bcol) high = t; \ 152 else low = t; \ 153 } \ 154 for (_i = low; _i < high; _i++) { \ 155 if (rp[_i] > bcol) break; \ 156 if (rp[_i] == bcol) { \ 157 bap = ap + bs2 * _i + bs * cidx + ridx; \ 158 if (addv == ADD_VALUES) *bap += value; \ 159 else *bap = value; \ 160 goto b_noinsert; \ 161 } \ 162 } \ 163 if (b->nonew == 1) goto b_noinsert; \ 164 PetscCheck(b->nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero at global row/column (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", orow, ocol); \ 165 MatSeqXAIJReallocateAIJ(B, b->mbs, bs2, nrow, brow, bcol, rmax, ba, bi, bj, rp, ap, bimax, b->nonew, MatScalar); \ 166 N = nrow++ - 1; \ 167 /* shift up all the later entries in this row */ \ 168 PetscCall(PetscArraymove(rp + _i + 1, rp + _i, N - _i + 1)); \ 169 PetscCall(PetscArraymove(ap + bs2 * (_i + 1), ap + bs2 * _i, bs2 * (N - _i + 1))); \ 170 PetscCall(PetscArrayzero(ap + bs2 * _i, bs2)); \ 171 rp[_i] = bcol; \ 172 ap[bs2 * _i + bs * cidx + ridx] = value; \ 173 b_noinsert:; \ 174 bilen[brow] = nrow; \ 175 } 176 177 PetscErrorCode MatSetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 178 { 179 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 180 MatScalar value; 181 PetscBool roworiented = baij->roworiented; 182 PetscInt i, j, row, col; 183 PetscInt rstart_orig = mat->rmap->rstart; 184 PetscInt rend_orig = mat->rmap->rend, cstart_orig = mat->cmap->rstart; 185 PetscInt cend_orig = mat->cmap->rend, bs = mat->rmap->bs; 186 187 /* Some Variables required in the macro */ 188 Mat A = baij->A; 189 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)(A)->data; 190 PetscInt *aimax = a->imax, *ai = a->i, *ailen = a->ilen, *aj = a->j; 191 MatScalar *aa = a->a; 192 193 Mat B = baij->B; 194 Mat_SeqBAIJ *b = (Mat_SeqBAIJ *)(B)->data; 195 PetscInt *bimax = b->imax, *bi = b->i, *bilen = b->ilen, *bj = b->j; 196 MatScalar *ba = b->a; 197 198 PetscInt *rp, ii, nrow, _i, rmax, N, brow, bcol; 199 PetscInt low, high, t, ridx, cidx, bs2 = a->bs2; 200 MatScalar *ap, *bap; 201 202 PetscFunctionBegin; 203 for (i = 0; i < m; i++) { 204 if (im[i] < 0) continue; 205 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 206 if (im[i] >= rstart_orig && im[i] < rend_orig) { 207 row = im[i] - rstart_orig; 208 for (j = 0; j < n; j++) { 209 if (in[j] >= cstart_orig && in[j] < cend_orig) { 210 col = in[j] - cstart_orig; 211 if (roworiented) value = v[i * n + j]; 212 else value = v[i + j * m]; 213 MatSetValues_SeqBAIJ_A_Private(row, col, value, addv, im[i], in[j]); 214 } else if (in[j] < 0) { 215 continue; 216 } else { 217 PetscCheck(in[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], mat->cmap->N - 1); 218 if (mat->was_assembled) { 219 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 220 #if defined(PETSC_USE_CTABLE) 221 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] / bs + 1, 0, &col)); 222 col = col - 1; 223 #else 224 col = baij->colmap[in[j] / bs] - 1; 225 #endif 226 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 227 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 228 col = in[j]; 229 /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */ 230 B = baij->B; 231 b = (Mat_SeqBAIJ *)(B)->data; 232 bimax = b->imax; 233 bi = b->i; 234 bilen = b->ilen; 235 bj = b->j; 236 ba = b->a; 237 } else { 238 PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new nonzero (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 239 col += in[j] % bs; 240 } 241 } else col = in[j]; 242 if (roworiented) value = v[i * n + j]; 243 else value = v[i + j * m]; 244 MatSetValues_SeqBAIJ_B_Private(row, col, value, addv, im[i], in[j]); 245 /* PetscCall(MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv)); */ 246 } 247 } 248 } else { 249 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 250 if (!baij->donotstash) { 251 mat->assembled = PETSC_FALSE; 252 if (roworiented) { 253 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 254 } else { 255 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 256 } 257 } 258 } 259 } 260 PetscFunctionReturn(0); 261 } 262 263 static inline PetscErrorCode MatSetValuesBlocked_SeqBAIJ_Inlined(Mat A, PetscInt row, PetscInt col, const PetscScalar v[], InsertMode is, PetscInt orow, PetscInt ocol) 264 { 265 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data; 266 PetscInt *rp, low, high, t, ii, jj, nrow, i, rmax, N; 267 PetscInt *imax = a->imax, *ai = a->i, *ailen = a->ilen; 268 PetscInt *aj = a->j, nonew = a->nonew, bs2 = a->bs2, bs = A->rmap->bs; 269 PetscBool roworiented = a->roworiented; 270 const PetscScalar *value = v; 271 MatScalar *ap, *aa = a->a, *bap; 272 273 PetscFunctionBegin; 274 rp = aj + ai[row]; 275 ap = aa + bs2 * ai[row]; 276 rmax = imax[row]; 277 nrow = ailen[row]; 278 value = v; 279 low = 0; 280 high = nrow; 281 while (high - low > 7) { 282 t = (low + high) / 2; 283 if (rp[t] > col) high = t; 284 else low = t; 285 } 286 for (i = low; i < high; i++) { 287 if (rp[i] > col) break; 288 if (rp[i] == col) { 289 bap = ap + bs2 * i; 290 if (roworiented) { 291 if (is == ADD_VALUES) { 292 for (ii = 0; ii < bs; ii++) { 293 for (jj = ii; jj < bs2; jj += bs) bap[jj] += *value++; 294 } 295 } else { 296 for (ii = 0; ii < bs; ii++) { 297 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 298 } 299 } 300 } else { 301 if (is == ADD_VALUES) { 302 for (ii = 0; ii < bs; ii++, value += bs) { 303 for (jj = 0; jj < bs; jj++) bap[jj] += value[jj]; 304 bap += bs; 305 } 306 } else { 307 for (ii = 0; ii < bs; ii++, value += bs) { 308 for (jj = 0; jj < bs; jj++) bap[jj] = value[jj]; 309 bap += bs; 310 } 311 } 312 } 313 goto noinsert2; 314 } 315 } 316 if (nonew == 1) goto noinsert2; 317 PetscCheck(nonew != -1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new global block indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") in the matrix", orow, ocol); 318 MatSeqXAIJReallocateAIJ(A, a->mbs, bs2, nrow, row, col, rmax, aa, ai, aj, rp, ap, imax, nonew, MatScalar); 319 N = nrow++ - 1; 320 high++; 321 /* shift up all the later entries in this row */ 322 PetscCall(PetscArraymove(rp + i + 1, rp + i, N - i + 1)); 323 PetscCall(PetscArraymove(ap + bs2 * (i + 1), ap + bs2 * i, bs2 * (N - i + 1))); 324 rp[i] = col; 325 bap = ap + bs2 * i; 326 if (roworiented) { 327 for (ii = 0; ii < bs; ii++) { 328 for (jj = ii; jj < bs2; jj += bs) bap[jj] = *value++; 329 } 330 } else { 331 for (ii = 0; ii < bs; ii++) { 332 for (jj = 0; jj < bs; jj++) *bap++ = *value++; 333 } 334 } 335 noinsert2:; 336 ailen[row] = nrow; 337 PetscFunctionReturn(0); 338 } 339 340 /* 341 This routine should be optimized so that the block copy at ** Here a copy is required ** below is not needed 342 by passing additional stride information into the MatSetValuesBlocked_SeqBAIJ_Inlined() routine 343 */ 344 PetscErrorCode MatSetValuesBlocked_MPIBAIJ(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 345 { 346 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 347 const PetscScalar *value; 348 MatScalar *barray = baij->barray; 349 PetscBool roworiented = baij->roworiented; 350 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 351 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 352 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 353 354 PetscFunctionBegin; 355 if (!barray) { 356 PetscCall(PetscMalloc1(bs2, &barray)); 357 baij->barray = barray; 358 } 359 360 if (roworiented) stepval = (n - 1) * bs; 361 else stepval = (m - 1) * bs; 362 363 for (i = 0; i < m; i++) { 364 if (im[i] < 0) continue; 365 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed row too large %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 366 if (im[i] >= rstart && im[i] < rend) { 367 row = im[i] - rstart; 368 for (j = 0; j < n; j++) { 369 /* If NumCol = 1 then a copy is not required */ 370 if ((roworiented) && (n == 1)) { 371 barray = (MatScalar *)v + i * bs2; 372 } else if ((!roworiented) && (m == 1)) { 373 barray = (MatScalar *)v + j * bs2; 374 } else { /* Here a copy is required */ 375 if (roworiented) { 376 value = v + (i * (stepval + bs) + j) * bs; 377 } else { 378 value = v + (j * (stepval + bs) + i) * bs; 379 } 380 for (ii = 0; ii < bs; ii++, value += bs + stepval) { 381 for (jj = 0; jj < bs; jj++) barray[jj] = value[jj]; 382 barray += bs; 383 } 384 barray -= bs2; 385 } 386 387 if (in[j] >= cstart && in[j] < cend) { 388 col = in[j] - cstart; 389 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 390 } else if (in[j] < 0) { 391 continue; 392 } else { 393 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Block indexed column too large %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 394 if (mat->was_assembled) { 395 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 396 397 #if defined(PETSC_USE_DEBUG) 398 #if defined(PETSC_USE_CTABLE) 399 { 400 PetscInt data; 401 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 402 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 403 } 404 #else 405 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 406 #endif 407 #endif 408 #if defined(PETSC_USE_CTABLE) 409 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 410 col = (col - 1) / bs; 411 #else 412 col = (baij->colmap[in[j]] - 1) / bs; 413 #endif 414 if (col < 0 && !((Mat_SeqBAIJ *)(baij->B->data))->nonew) { 415 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 416 col = in[j]; 417 } else PetscCheck(col >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Inserting a new blocked indexed nonzero block (%" PetscInt_FMT ", %" PetscInt_FMT ") into matrix", im[i], in[j]); 418 } else col = in[j]; 419 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 420 } 421 } 422 } else { 423 PetscCheck(!mat->nooffprocentries, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Setting off process block indexed row %" PetscInt_FMT " even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set", im[i]); 424 if (!baij->donotstash) { 425 if (roworiented) { 426 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 427 } else { 428 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 429 } 430 } 431 } 432 } 433 PetscFunctionReturn(0); 434 } 435 436 #define HASH_KEY 0.6180339887 437 #define HASH(size, key, tmp) (tmp = (key)*HASH_KEY, (PetscInt)((size) * (tmp - (PetscInt)tmp))) 438 /* #define HASH(size,key) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 439 /* #define HASH(size,key,tmp) ((PetscInt)((size)*fmod(((key)*HASH_KEY),1))) */ 440 PetscErrorCode MatSetValues_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 441 { 442 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 443 PetscBool roworiented = baij->roworiented; 444 PetscInt i, j, row, col; 445 PetscInt rstart_orig = mat->rmap->rstart; 446 PetscInt rend_orig = mat->rmap->rend, Nbs = baij->Nbs; 447 PetscInt h1, key, size = baij->ht_size, bs = mat->rmap->bs, *HT = baij->ht, idx; 448 PetscReal tmp; 449 MatScalar **HD = baij->hd, value; 450 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 451 452 PetscFunctionBegin; 453 for (i = 0; i < m; i++) { 454 if (PetscDefined(USE_DEBUG)) { 455 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row"); 456 PetscCheck(im[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], mat->rmap->N - 1); 457 } 458 row = im[i]; 459 if (row >= rstart_orig && row < rend_orig) { 460 for (j = 0; j < n; j++) { 461 col = in[j]; 462 if (roworiented) value = v[i * n + j]; 463 else value = v[i + j * m]; 464 /* Look up PetscInto the Hash Table */ 465 key = (row / bs) * Nbs + (col / bs) + 1; 466 h1 = HASH(size, key, tmp); 467 468 idx = h1; 469 if (PetscDefined(USE_DEBUG)) { 470 insert_ct++; 471 total_ct++; 472 if (HT[idx] != key) { 473 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 474 ; 475 if (idx == size) { 476 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 477 ; 478 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 479 } 480 } 481 } else if (HT[idx] != key) { 482 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 483 ; 484 if (idx == size) { 485 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 486 ; 487 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 488 } 489 } 490 /* A HASH table entry is found, so insert the values at the correct address */ 491 if (addv == ADD_VALUES) *(HD[idx] + (col % bs) * bs + (row % bs)) += value; 492 else *(HD[idx] + (col % bs) * bs + (row % bs)) = value; 493 } 494 } else if (!baij->donotstash) { 495 if (roworiented) { 496 PetscCall(MatStashValuesRow_Private(&mat->stash, im[i], n, in, v + i * n, PETSC_FALSE)); 497 } else { 498 PetscCall(MatStashValuesCol_Private(&mat->stash, im[i], n, in, v + i, m, PETSC_FALSE)); 499 } 500 } 501 } 502 if (PetscDefined(USE_DEBUG)) { 503 baij->ht_total_ct += total_ct; 504 baij->ht_insert_ct += insert_ct; 505 } 506 PetscFunctionReturn(0); 507 } 508 509 PetscErrorCode MatSetValuesBlocked_MPIBAIJ_HT(Mat mat, PetscInt m, const PetscInt im[], PetscInt n, const PetscInt in[], const PetscScalar v[], InsertMode addv) 510 { 511 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 512 PetscBool roworiented = baij->roworiented; 513 PetscInt i, j, ii, jj, row, col; 514 PetscInt rstart = baij->rstartbs; 515 PetscInt rend = mat->rmap->rend, stepval, bs = mat->rmap->bs, bs2 = baij->bs2, nbs2 = n * bs2; 516 PetscInt h1, key, size = baij->ht_size, idx, *HT = baij->ht, Nbs = baij->Nbs; 517 PetscReal tmp; 518 MatScalar **HD = baij->hd, *baij_a; 519 const PetscScalar *v_t, *value; 520 PetscInt total_ct = baij->ht_total_ct, insert_ct = baij->ht_insert_ct; 521 522 PetscFunctionBegin; 523 if (roworiented) stepval = (n - 1) * bs; 524 else stepval = (m - 1) * bs; 525 526 for (i = 0; i < m; i++) { 527 if (PetscDefined(USE_DEBUG)) { 528 PetscCheck(im[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Negative row: %" PetscInt_FMT, im[i]); 529 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 530 } 531 row = im[i]; 532 v_t = v + i * nbs2; 533 if (row >= rstart && row < rend) { 534 for (j = 0; j < n; j++) { 535 col = in[j]; 536 537 /* Look up into the Hash Table */ 538 key = row * Nbs + col + 1; 539 h1 = HASH(size, key, tmp); 540 541 idx = h1; 542 if (PetscDefined(USE_DEBUG)) { 543 total_ct++; 544 insert_ct++; 545 if (HT[idx] != key) { 546 for (idx = h1; (idx < size) && (HT[idx] != key); idx++, total_ct++) 547 ; 548 if (idx == size) { 549 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++, total_ct++) 550 ; 551 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 552 } 553 } 554 } else if (HT[idx] != key) { 555 for (idx = h1; (idx < size) && (HT[idx] != key); idx++) 556 ; 557 if (idx == size) { 558 for (idx = 0; (idx < h1) && (HT[idx] != key); idx++) 559 ; 560 PetscCheck(idx != h1, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "(%" PetscInt_FMT ",%" PetscInt_FMT ") has no entry in the hash table", row, col); 561 } 562 } 563 baij_a = HD[idx]; 564 if (roworiented) { 565 /*value = v + i*(stepval+bs)*bs + j*bs;*/ 566 /* value = v + (i*(stepval+bs)+j)*bs; */ 567 value = v_t; 568 v_t += bs; 569 if (addv == ADD_VALUES) { 570 for (ii = 0; ii < bs; ii++, value += stepval) { 571 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] += *value++; 572 } 573 } else { 574 for (ii = 0; ii < bs; ii++, value += stepval) { 575 for (jj = ii; jj < bs2; jj += bs) baij_a[jj] = *value++; 576 } 577 } 578 } else { 579 value = v + j * (stepval + bs) * bs + i * bs; 580 if (addv == ADD_VALUES) { 581 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 582 for (jj = 0; jj < bs; jj++) baij_a[jj] += *value++; 583 } 584 } else { 585 for (ii = 0; ii < bs; ii++, value += stepval, baij_a += bs) { 586 for (jj = 0; jj < bs; jj++) baij_a[jj] = *value++; 587 } 588 } 589 } 590 } 591 } else { 592 if (!baij->donotstash) { 593 if (roworiented) { 594 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 595 } else { 596 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 597 } 598 } 599 } 600 } 601 if (PetscDefined(USE_DEBUG)) { 602 baij->ht_total_ct += total_ct; 603 baij->ht_insert_ct += insert_ct; 604 } 605 PetscFunctionReturn(0); 606 } 607 608 PetscErrorCode MatGetValues_MPIBAIJ(Mat mat, PetscInt m, const PetscInt idxm[], PetscInt n, const PetscInt idxn[], PetscScalar v[]) 609 { 610 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 611 PetscInt bs = mat->rmap->bs, i, j, bsrstart = mat->rmap->rstart, bsrend = mat->rmap->rend; 612 PetscInt bscstart = mat->cmap->rstart, bscend = mat->cmap->rend, row, col, data; 613 614 PetscFunctionBegin; 615 for (i = 0; i < m; i++) { 616 if (idxm[i] < 0) continue; /* negative row */ 617 PetscCheck(idxm[i] < mat->rmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large: row %" PetscInt_FMT " max %" PetscInt_FMT, idxm[i], mat->rmap->N - 1); 618 if (idxm[i] >= bsrstart && idxm[i] < bsrend) { 619 row = idxm[i] - bsrstart; 620 for (j = 0; j < n; j++) { 621 if (idxn[j] < 0) continue; /* negative column */ 622 PetscCheck(idxn[j] < mat->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large: col %" PetscInt_FMT " max %" PetscInt_FMT, idxn[j], mat->cmap->N - 1); 623 if (idxn[j] >= bscstart && idxn[j] < bscend) { 624 col = idxn[j] - bscstart; 625 PetscCall(MatGetValues_SeqBAIJ(baij->A, 1, &row, 1, &col, v + i * n + j)); 626 } else { 627 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 628 #if defined(PETSC_USE_CTABLE) 629 PetscCall(PetscHMapIGetWithDefault(baij->colmap, idxn[j] / bs + 1, 0, &data)); 630 data--; 631 #else 632 data = baij->colmap[idxn[j] / bs] - 1; 633 #endif 634 if ((data < 0) || (baij->garray[data / bs] != idxn[j] / bs)) *(v + i * n + j) = 0.0; 635 else { 636 col = data + idxn[j] % bs; 637 PetscCall(MatGetValues_SeqBAIJ(baij->B, 1, &row, 1, &col, v + i * n + j)); 638 } 639 } 640 } 641 } else SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local values currently supported"); 642 } 643 PetscFunctionReturn(0); 644 } 645 646 PetscErrorCode MatNorm_MPIBAIJ(Mat mat, NormType type, PetscReal *nrm) 647 { 648 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 649 Mat_SeqBAIJ *amat = (Mat_SeqBAIJ *)baij->A->data, *bmat = (Mat_SeqBAIJ *)baij->B->data; 650 PetscInt i, j, bs2 = baij->bs2, bs = baij->A->rmap->bs, nz, row, col; 651 PetscReal sum = 0.0; 652 MatScalar *v; 653 654 PetscFunctionBegin; 655 if (baij->size == 1) { 656 PetscCall(MatNorm(baij->A, type, nrm)); 657 } else { 658 if (type == NORM_FROBENIUS) { 659 v = amat->a; 660 nz = amat->nz * bs2; 661 for (i = 0; i < nz; i++) { 662 sum += PetscRealPart(PetscConj(*v) * (*v)); 663 v++; 664 } 665 v = bmat->a; 666 nz = bmat->nz * bs2; 667 for (i = 0; i < nz; i++) { 668 sum += PetscRealPart(PetscConj(*v) * (*v)); 669 v++; 670 } 671 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 672 *nrm = PetscSqrtReal(*nrm); 673 } else if (type == NORM_1) { /* max column sum */ 674 PetscReal *tmp, *tmp2; 675 PetscInt *jj, *garray = baij->garray, cstart = baij->rstartbs; 676 PetscCall(PetscCalloc1(mat->cmap->N, &tmp)); 677 PetscCall(PetscMalloc1(mat->cmap->N, &tmp2)); 678 v = amat->a; 679 jj = amat->j; 680 for (i = 0; i < amat->nz; i++) { 681 for (j = 0; j < bs; j++) { 682 col = bs * (cstart + *jj) + j; /* column index */ 683 for (row = 0; row < bs; row++) { 684 tmp[col] += PetscAbsScalar(*v); 685 v++; 686 } 687 } 688 jj++; 689 } 690 v = bmat->a; 691 jj = bmat->j; 692 for (i = 0; i < bmat->nz; i++) { 693 for (j = 0; j < bs; j++) { 694 col = bs * garray[*jj] + j; 695 for (row = 0; row < bs; row++) { 696 tmp[col] += PetscAbsScalar(*v); 697 v++; 698 } 699 } 700 jj++; 701 } 702 PetscCall(MPIU_Allreduce(tmp, tmp2, mat->cmap->N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)mat))); 703 *nrm = 0.0; 704 for (j = 0; j < mat->cmap->N; j++) { 705 if (tmp2[j] > *nrm) *nrm = tmp2[j]; 706 } 707 PetscCall(PetscFree(tmp)); 708 PetscCall(PetscFree(tmp2)); 709 } else if (type == NORM_INFINITY) { /* max row sum */ 710 PetscReal *sums; 711 PetscCall(PetscMalloc1(bs, &sums)); 712 sum = 0.0; 713 for (j = 0; j < amat->mbs; j++) { 714 for (row = 0; row < bs; row++) sums[row] = 0.0; 715 v = amat->a + bs2 * amat->i[j]; 716 nz = amat->i[j + 1] - amat->i[j]; 717 for (i = 0; i < nz; i++) { 718 for (col = 0; col < bs; col++) { 719 for (row = 0; row < bs; row++) { 720 sums[row] += PetscAbsScalar(*v); 721 v++; 722 } 723 } 724 } 725 v = bmat->a + bs2 * bmat->i[j]; 726 nz = bmat->i[j + 1] - bmat->i[j]; 727 for (i = 0; i < nz; i++) { 728 for (col = 0; col < bs; col++) { 729 for (row = 0; row < bs; row++) { 730 sums[row] += PetscAbsScalar(*v); 731 v++; 732 } 733 } 734 } 735 for (row = 0; row < bs; row++) { 736 if (sums[row] > sum) sum = sums[row]; 737 } 738 } 739 PetscCall(MPIU_Allreduce(&sum, nrm, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)mat))); 740 PetscCall(PetscFree(sums)); 741 } else SETERRQ(PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "No support for this norm yet"); 742 } 743 PetscFunctionReturn(0); 744 } 745 746 /* 747 Creates the hash table, and sets the table 748 This table is created only once. 749 If new entried need to be added to the matrix 750 then the hash table has to be destroyed and 751 recreated. 752 */ 753 PetscErrorCode MatCreateHashTable_MPIBAIJ_Private(Mat mat, PetscReal factor) 754 { 755 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 756 Mat A = baij->A, B = baij->B; 757 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b = (Mat_SeqBAIJ *)B->data; 758 PetscInt i, j, k, nz = a->nz + b->nz, h1, *ai = a->i, *aj = a->j, *bi = b->i, *bj = b->j; 759 PetscInt ht_size, bs2 = baij->bs2, rstart = baij->rstartbs; 760 PetscInt cstart = baij->cstartbs, *garray = baij->garray, row, col, Nbs = baij->Nbs; 761 PetscInt *HT, key; 762 MatScalar **HD; 763 PetscReal tmp; 764 #if defined(PETSC_USE_INFO) 765 PetscInt ct = 0, max = 0; 766 #endif 767 768 PetscFunctionBegin; 769 if (baij->ht) PetscFunctionReturn(0); 770 771 baij->ht_size = (PetscInt)(factor * nz); 772 ht_size = baij->ht_size; 773 774 /* Allocate Memory for Hash Table */ 775 PetscCall(PetscCalloc2(ht_size, &baij->hd, ht_size, &baij->ht)); 776 HD = baij->hd; 777 HT = baij->ht; 778 779 /* Loop Over A */ 780 for (i = 0; i < a->mbs; i++) { 781 for (j = ai[i]; j < ai[i + 1]; j++) { 782 row = i + rstart; 783 col = aj[j] + cstart; 784 785 key = row * Nbs + col + 1; 786 h1 = HASH(ht_size, key, tmp); 787 for (k = 0; k < ht_size; k++) { 788 if (!HT[(h1 + k) % ht_size]) { 789 HT[(h1 + k) % ht_size] = key; 790 HD[(h1 + k) % ht_size] = a->a + j * bs2; 791 break; 792 #if defined(PETSC_USE_INFO) 793 } else { 794 ct++; 795 #endif 796 } 797 } 798 #if defined(PETSC_USE_INFO) 799 if (k > max) max = k; 800 #endif 801 } 802 } 803 /* Loop Over B */ 804 for (i = 0; i < b->mbs; i++) { 805 for (j = bi[i]; j < bi[i + 1]; j++) { 806 row = i + rstart; 807 col = garray[bj[j]]; 808 key = row * Nbs + col + 1; 809 h1 = HASH(ht_size, key, tmp); 810 for (k = 0; k < ht_size; k++) { 811 if (!HT[(h1 + k) % ht_size]) { 812 HT[(h1 + k) % ht_size] = key; 813 HD[(h1 + k) % ht_size] = b->a + j * bs2; 814 break; 815 #if defined(PETSC_USE_INFO) 816 } else { 817 ct++; 818 #endif 819 } 820 } 821 #if defined(PETSC_USE_INFO) 822 if (k > max) max = k; 823 #endif 824 } 825 } 826 827 /* Print Summary */ 828 #if defined(PETSC_USE_INFO) 829 for (i = 0, j = 0; i < ht_size; i++) { 830 if (HT[i]) j++; 831 } 832 PetscCall(PetscInfo(mat, "Average Search = %5.2g,max search = %" PetscInt_FMT "\n", (!j) ? (double)0.0 : (double)(((PetscReal)(ct + j)) / (double)j), max)); 833 #endif 834 PetscFunctionReturn(0); 835 } 836 837 PetscErrorCode MatAssemblyBegin_MPIBAIJ(Mat mat, MatAssemblyType mode) 838 { 839 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 840 PetscInt nstash, reallocs; 841 842 PetscFunctionBegin; 843 if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0); 844 845 PetscCall(MatStashScatterBegin_Private(mat, &mat->stash, mat->rmap->range)); 846 PetscCall(MatStashScatterBegin_Private(mat, &mat->bstash, baij->rangebs)); 847 PetscCall(MatStashGetInfo_Private(&mat->stash, &nstash, &reallocs)); 848 PetscCall(PetscInfo(mat, "Stash has %" PetscInt_FMT " entries,uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 849 PetscCall(MatStashGetInfo_Private(&mat->bstash, &nstash, &reallocs)); 850 PetscCall(PetscInfo(mat, "Block-Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n", nstash, reallocs)); 851 PetscFunctionReturn(0); 852 } 853 854 PetscErrorCode MatAssemblyEnd_MPIBAIJ(Mat mat, MatAssemblyType mode) 855 { 856 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 857 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)baij->A->data; 858 PetscInt i, j, rstart, ncols, flg, bs2 = baij->bs2; 859 PetscInt *row, *col; 860 PetscBool r1, r2, r3, other_disassembled; 861 MatScalar *val; 862 PetscMPIInt n; 863 864 PetscFunctionBegin; 865 /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */ 866 if (!baij->donotstash && !mat->nooffprocentries) { 867 while (1) { 868 PetscCall(MatStashScatterGetMesg_Private(&mat->stash, &n, &row, &col, &val, &flg)); 869 if (!flg) break; 870 871 for (i = 0; i < n;) { 872 /* Now identify the consecutive vals belonging to the same row */ 873 for (j = i, rstart = row[j]; j < n; j++) { 874 if (row[j] != rstart) break; 875 } 876 if (j < n) ncols = j - i; 877 else ncols = n - i; 878 /* Now assemble all these values with a single function call */ 879 PetscCall(MatSetValues_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i, mat->insertmode)); 880 i = j; 881 } 882 } 883 PetscCall(MatStashScatterEnd_Private(&mat->stash)); 884 /* Now process the block-stash. Since the values are stashed column-oriented, 885 set the roworiented flag to column oriented, and after MatSetValues() 886 restore the original flags */ 887 r1 = baij->roworiented; 888 r2 = a->roworiented; 889 r3 = ((Mat_SeqBAIJ *)baij->B->data)->roworiented; 890 891 baij->roworiented = PETSC_FALSE; 892 a->roworiented = PETSC_FALSE; 893 894 (((Mat_SeqBAIJ *)baij->B->data))->roworiented = PETSC_FALSE; /* b->roworiented */ 895 while (1) { 896 PetscCall(MatStashScatterGetMesg_Private(&mat->bstash, &n, &row, &col, &val, &flg)); 897 if (!flg) break; 898 899 for (i = 0; i < n;) { 900 /* Now identify the consecutive vals belonging to the same row */ 901 for (j = i, rstart = row[j]; j < n; j++) { 902 if (row[j] != rstart) break; 903 } 904 if (j < n) ncols = j - i; 905 else ncols = n - i; 906 PetscCall(MatSetValuesBlocked_MPIBAIJ(mat, 1, row + i, ncols, col + i, val + i * bs2, mat->insertmode)); 907 i = j; 908 } 909 } 910 PetscCall(MatStashScatterEnd_Private(&mat->bstash)); 911 912 baij->roworiented = r1; 913 a->roworiented = r2; 914 915 ((Mat_SeqBAIJ *)baij->B->data)->roworiented = r3; /* b->roworiented */ 916 } 917 918 PetscCall(MatAssemblyBegin(baij->A, mode)); 919 PetscCall(MatAssemblyEnd(baij->A, mode)); 920 921 /* determine if any processor has disassembled, if so we must 922 also disassemble ourselves, in order that we may reassemble. */ 923 /* 924 if nonzero structure of submatrix B cannot change then we know that 925 no processor disassembled thus we can skip this stuff 926 */ 927 if (!((Mat_SeqBAIJ *)baij->B->data)->nonew) { 928 PetscCall(MPIU_Allreduce(&mat->was_assembled, &other_disassembled, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat))); 929 if (mat->was_assembled && !other_disassembled) PetscCall(MatDisAssemble_MPIBAIJ(mat)); 930 } 931 932 if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) PetscCall(MatSetUpMultiply_MPIBAIJ(mat)); 933 PetscCall(MatAssemblyBegin(baij->B, mode)); 934 PetscCall(MatAssemblyEnd(baij->B, mode)); 935 936 #if defined(PETSC_USE_INFO) 937 if (baij->ht && mode == MAT_FINAL_ASSEMBLY) { 938 PetscCall(PetscInfo(mat, "Average Hash Table Search in MatSetValues = %5.2f\n", (double)((PetscReal)baij->ht_total_ct) / baij->ht_insert_ct)); 939 940 baij->ht_total_ct = 0; 941 baij->ht_insert_ct = 0; 942 } 943 #endif 944 if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) { 945 PetscCall(MatCreateHashTable_MPIBAIJ_Private(mat, baij->ht_fact)); 946 947 mat->ops->setvalues = MatSetValues_MPIBAIJ_HT; 948 mat->ops->setvaluesblocked = MatSetValuesBlocked_MPIBAIJ_HT; 949 } 950 951 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 952 953 baij->rowvalues = NULL; 954 955 /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */ 956 if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 957 PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate; 958 PetscCall(MPIU_Allreduce(&state, &mat->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)mat))); 959 } 960 PetscFunctionReturn(0); 961 } 962 963 extern PetscErrorCode MatView_SeqBAIJ(Mat, PetscViewer); 964 #include <petscdraw.h> 965 static PetscErrorCode MatView_MPIBAIJ_ASCIIorDraworSocket(Mat mat, PetscViewer viewer) 966 { 967 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 968 PetscMPIInt rank = baij->rank; 969 PetscInt bs = mat->rmap->bs; 970 PetscBool iascii, isdraw; 971 PetscViewer sviewer; 972 PetscViewerFormat format; 973 974 PetscFunctionBegin; 975 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 976 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 977 if (iascii) { 978 PetscCall(PetscViewerGetFormat(viewer, &format)); 979 if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) { 980 MatInfo info; 981 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank)); 982 PetscCall(MatGetInfo(mat, MAT_LOCAL, &info)); 983 PetscCall(PetscViewerASCIIPushSynchronized(viewer)); 984 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] Local rows %" PetscInt_FMT " nz %" PetscInt_FMT " nz alloced %" PetscInt_FMT " bs %" PetscInt_FMT " mem %g\n", rank, mat->rmap->n, (PetscInt)info.nz_used, (PetscInt)info.nz_allocated, 985 mat->rmap->bs, (double)info.memory)); 986 PetscCall(MatGetInfo(baij->A, MAT_LOCAL, &info)); 987 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] on-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 988 PetscCall(MatGetInfo(baij->B, MAT_LOCAL, &info)); 989 PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "[%d] off-diagonal part: nz %" PetscInt_FMT " \n", rank, (PetscInt)info.nz_used)); 990 PetscCall(PetscViewerFlush(viewer)); 991 PetscCall(PetscViewerASCIIPopSynchronized(viewer)); 992 PetscCall(PetscViewerASCIIPrintf(viewer, "Information on VecScatter used in matrix-vector product: \n")); 993 PetscCall(VecScatterView(baij->Mvctx, viewer)); 994 PetscFunctionReturn(0); 995 } else if (format == PETSC_VIEWER_ASCII_INFO) { 996 PetscCall(PetscViewerASCIIPrintf(viewer, " block size is %" PetscInt_FMT "\n", bs)); 997 PetscFunctionReturn(0); 998 } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) { 999 PetscFunctionReturn(0); 1000 } 1001 } 1002 1003 if (isdraw) { 1004 PetscDraw draw; 1005 PetscBool isnull; 1006 PetscCall(PetscViewerDrawGetDraw(viewer, 0, &draw)); 1007 PetscCall(PetscDrawIsNull(draw, &isnull)); 1008 if (isnull) PetscFunctionReturn(0); 1009 } 1010 1011 { 1012 /* assemble the entire matrix onto first processor. */ 1013 Mat A; 1014 Mat_SeqBAIJ *Aloc; 1015 PetscInt M = mat->rmap->N, N = mat->cmap->N, *ai, *aj, col, i, j, k, *rvals, mbs = baij->mbs; 1016 MatScalar *a; 1017 const char *matname; 1018 1019 /* Here we are creating a temporary matrix, so will assume MPIBAIJ is acceptable */ 1020 /* Perhaps this should be the type of mat? */ 1021 PetscCall(MatCreate(PetscObjectComm((PetscObject)mat), &A)); 1022 if (rank == 0) { 1023 PetscCall(MatSetSizes(A, M, N, M, N)); 1024 } else { 1025 PetscCall(MatSetSizes(A, 0, 0, M, N)); 1026 } 1027 PetscCall(MatSetType(A, MATMPIBAIJ)); 1028 PetscCall(MatMPIBAIJSetPreallocation(A, mat->rmap->bs, 0, NULL, 0, NULL)); 1029 PetscCall(MatSetOption(A, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE)); 1030 1031 /* copy over the A part */ 1032 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1033 ai = Aloc->i; 1034 aj = Aloc->j; 1035 a = Aloc->a; 1036 PetscCall(PetscMalloc1(bs, &rvals)); 1037 1038 for (i = 0; i < mbs; i++) { 1039 rvals[0] = bs * (baij->rstartbs + i); 1040 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1041 for (j = ai[i]; j < ai[i + 1]; j++) { 1042 col = (baij->cstartbs + aj[j]) * bs; 1043 for (k = 0; k < bs; k++) { 1044 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1045 col++; 1046 a += bs; 1047 } 1048 } 1049 } 1050 /* copy over the B part */ 1051 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1052 ai = Aloc->i; 1053 aj = Aloc->j; 1054 a = Aloc->a; 1055 for (i = 0; i < mbs; i++) { 1056 rvals[0] = bs * (baij->rstartbs + i); 1057 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1058 for (j = ai[i]; j < ai[i + 1]; j++) { 1059 col = baij->garray[aj[j]] * bs; 1060 for (k = 0; k < bs; k++) { 1061 PetscCall(MatSetValues_MPIBAIJ(A, bs, rvals, 1, &col, a, INSERT_VALUES)); 1062 col++; 1063 a += bs; 1064 } 1065 } 1066 } 1067 PetscCall(PetscFree(rvals)); 1068 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1069 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1070 /* 1071 Everyone has to call to draw the matrix since the graphics waits are 1072 synchronized across all processors that share the PetscDraw object 1073 */ 1074 PetscCall(PetscViewerGetSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1075 PetscCall(PetscObjectGetName((PetscObject)mat, &matname)); 1076 if (rank == 0) { 1077 PetscCall(PetscObjectSetName((PetscObject)((Mat_MPIBAIJ *)(A->data))->A, matname)); 1078 PetscCall(MatView_SeqBAIJ(((Mat_MPIBAIJ *)(A->data))->A, sviewer)); 1079 } 1080 PetscCall(PetscViewerRestoreSubViewer(viewer, PETSC_COMM_SELF, &sviewer)); 1081 PetscCall(PetscViewerFlush(viewer)); 1082 PetscCall(MatDestroy(&A)); 1083 } 1084 PetscFunctionReturn(0); 1085 } 1086 1087 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 1088 PetscErrorCode MatView_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 1089 { 1090 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)mat->data; 1091 Mat_SeqBAIJ *A = (Mat_SeqBAIJ *)aij->A->data; 1092 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)aij->B->data; 1093 const PetscInt *garray = aij->garray; 1094 PetscInt header[4], M, N, m, rs, cs, bs, nz, cnt, i, j, ja, jb, k, l; 1095 PetscInt *rowlens, *colidxs; 1096 PetscScalar *matvals; 1097 1098 PetscFunctionBegin; 1099 PetscCall(PetscViewerSetUp(viewer)); 1100 1101 M = mat->rmap->N; 1102 N = mat->cmap->N; 1103 m = mat->rmap->n; 1104 rs = mat->rmap->rstart; 1105 cs = mat->cmap->rstart; 1106 bs = mat->rmap->bs; 1107 nz = bs * bs * (A->nz + B->nz); 1108 1109 /* write matrix header */ 1110 header[0] = MAT_FILE_CLASSID; 1111 header[1] = M; 1112 header[2] = N; 1113 header[3] = nz; 1114 PetscCallMPI(MPI_Reduce(&nz, &header[3], 1, MPIU_INT, MPI_SUM, 0, PetscObjectComm((PetscObject)mat))); 1115 PetscCall(PetscViewerBinaryWrite(viewer, header, 4, PETSC_INT)); 1116 1117 /* fill in and store row lengths */ 1118 PetscCall(PetscMalloc1(m, &rowlens)); 1119 for (cnt = 0, i = 0; i < A->mbs; i++) 1120 for (j = 0; j < bs; j++) rowlens[cnt++] = bs * (A->i[i + 1] - A->i[i] + B->i[i + 1] - B->i[i]); 1121 PetscCall(PetscViewerBinaryWriteAll(viewer, rowlens, m, rs, M, PETSC_INT)); 1122 PetscCall(PetscFree(rowlens)); 1123 1124 /* fill in and store column indices */ 1125 PetscCall(PetscMalloc1(nz, &colidxs)); 1126 for (cnt = 0, i = 0; i < A->mbs; i++) { 1127 for (k = 0; k < bs; k++) { 1128 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1129 if (garray[B->j[jb]] > cs / bs) break; 1130 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1131 } 1132 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1133 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * A->j[ja] + l + cs; 1134 for (; jb < B->i[i + 1]; jb++) 1135 for (l = 0; l < bs; l++) colidxs[cnt++] = bs * garray[B->j[jb]] + l; 1136 } 1137 } 1138 PetscCheck(cnt == nz, PETSC_COMM_SELF, PETSC_ERR_LIB, "Internal PETSc error: cnt = %" PetscInt_FMT " nz = %" PetscInt_FMT, cnt, nz); 1139 PetscCall(PetscViewerBinaryWriteAll(viewer, colidxs, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_INT)); 1140 PetscCall(PetscFree(colidxs)); 1141 1142 /* fill in and store nonzero values */ 1143 PetscCall(PetscMalloc1(nz, &matvals)); 1144 for (cnt = 0, i = 0; i < A->mbs; i++) { 1145 for (k = 0; k < bs; k++) { 1146 for (jb = B->i[i]; jb < B->i[i + 1]; jb++) { 1147 if (garray[B->j[jb]] > cs / bs) break; 1148 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1149 } 1150 for (ja = A->i[i]; ja < A->i[i + 1]; ja++) 1151 for (l = 0; l < bs; l++) matvals[cnt++] = A->a[bs * (bs * ja + l) + k]; 1152 for (; jb < B->i[i + 1]; jb++) 1153 for (l = 0; l < bs; l++) matvals[cnt++] = B->a[bs * (bs * jb + l) + k]; 1154 } 1155 } 1156 PetscCall(PetscViewerBinaryWriteAll(viewer, matvals, nz, PETSC_DECIDE, PETSC_DECIDE, PETSC_SCALAR)); 1157 PetscCall(PetscFree(matvals)); 1158 1159 /* write block size option to the viewer's .info file */ 1160 PetscCall(MatView_Binary_BlockSizes(mat, viewer)); 1161 PetscFunctionReturn(0); 1162 } 1163 1164 PetscErrorCode MatView_MPIBAIJ(Mat mat, PetscViewer viewer) 1165 { 1166 PetscBool iascii, isdraw, issocket, isbinary; 1167 1168 PetscFunctionBegin; 1169 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii)); 1170 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERDRAW, &isdraw)); 1171 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERSOCKET, &issocket)); 1172 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 1173 if (iascii || isdraw || issocket) { 1174 PetscCall(MatView_MPIBAIJ_ASCIIorDraworSocket(mat, viewer)); 1175 } else if (isbinary) PetscCall(MatView_MPIBAIJ_Binary(mat, viewer)); 1176 PetscFunctionReturn(0); 1177 } 1178 1179 PetscErrorCode MatDestroy_MPIBAIJ(Mat mat) 1180 { 1181 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1182 1183 PetscFunctionBegin; 1184 #if defined(PETSC_USE_LOG) 1185 PetscLogObjectState((PetscObject)mat, "Rows=%" PetscInt_FMT ",Cols=%" PetscInt_FMT, mat->rmap->N, mat->cmap->N); 1186 #endif 1187 PetscCall(MatStashDestroy_Private(&mat->stash)); 1188 PetscCall(MatStashDestroy_Private(&mat->bstash)); 1189 PetscCall(MatDestroy(&baij->A)); 1190 PetscCall(MatDestroy(&baij->B)); 1191 #if defined(PETSC_USE_CTABLE) 1192 PetscCall(PetscHMapIDestroy(&baij->colmap)); 1193 #else 1194 PetscCall(PetscFree(baij->colmap)); 1195 #endif 1196 PetscCall(PetscFree(baij->garray)); 1197 PetscCall(VecDestroy(&baij->lvec)); 1198 PetscCall(VecScatterDestroy(&baij->Mvctx)); 1199 PetscCall(PetscFree2(baij->rowvalues, baij->rowindices)); 1200 PetscCall(PetscFree(baij->barray)); 1201 PetscCall(PetscFree2(baij->hd, baij->ht)); 1202 PetscCall(PetscFree(baij->rangebs)); 1203 PetscCall(PetscFree(mat->data)); 1204 1205 PetscCall(PetscObjectChangeTypeName((PetscObject)mat, NULL)); 1206 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatStoreValues_C", NULL)); 1207 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatRetrieveValues_C", NULL)); 1208 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocation_C", NULL)); 1209 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatMPIBAIJSetPreallocationCSR_C", NULL)); 1210 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatDiagonalScaleLocal_C", NULL)); 1211 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatSetHashTableFactor_C", NULL)); 1212 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpisbaij_C", NULL)); 1213 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiadj_C", NULL)); 1214 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_mpiaij_C", NULL)); 1215 #if defined(PETSC_HAVE_HYPRE) 1216 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_hypre_C", NULL)); 1217 #endif 1218 PetscCall(PetscObjectComposeFunction((PetscObject)mat, "MatConvert_mpibaij_is_C", NULL)); 1219 PetscFunctionReturn(0); 1220 } 1221 1222 PetscErrorCode MatMult_MPIBAIJ(Mat A, Vec xx, Vec yy) 1223 { 1224 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1225 PetscInt nt; 1226 1227 PetscFunctionBegin; 1228 PetscCall(VecGetLocalSize(xx, &nt)); 1229 PetscCheck(nt == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible partition of A and xx"); 1230 PetscCall(VecGetLocalSize(yy, &nt)); 1231 PetscCheck(nt == A->rmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Incompatible parition of A and yy"); 1232 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1233 PetscCall((*a->A->ops->mult)(a->A, xx, yy)); 1234 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1235 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, yy, yy)); 1236 PetscFunctionReturn(0); 1237 } 1238 1239 PetscErrorCode MatMultAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1240 { 1241 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1242 1243 PetscFunctionBegin; 1244 PetscCall(VecScatterBegin(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1245 PetscCall((*a->A->ops->multadd)(a->A, xx, yy, zz)); 1246 PetscCall(VecScatterEnd(a->Mvctx, xx, a->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1247 PetscCall((*a->B->ops->multadd)(a->B, a->lvec, zz, zz)); 1248 PetscFunctionReturn(0); 1249 } 1250 1251 PetscErrorCode MatMultTranspose_MPIBAIJ(Mat A, Vec xx, Vec yy) 1252 { 1253 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1254 1255 PetscFunctionBegin; 1256 /* do nondiagonal part */ 1257 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1258 /* do local part */ 1259 PetscCall((*a->A->ops->multtranspose)(a->A, xx, yy)); 1260 /* add partial results together */ 1261 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1262 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, yy, ADD_VALUES, SCATTER_REVERSE)); 1263 PetscFunctionReturn(0); 1264 } 1265 1266 PetscErrorCode MatMultTransposeAdd_MPIBAIJ(Mat A, Vec xx, Vec yy, Vec zz) 1267 { 1268 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1269 1270 PetscFunctionBegin; 1271 /* do nondiagonal part */ 1272 PetscCall((*a->B->ops->multtranspose)(a->B, xx, a->lvec)); 1273 /* do local part */ 1274 PetscCall((*a->A->ops->multtransposeadd)(a->A, xx, yy, zz)); 1275 /* add partial results together */ 1276 PetscCall(VecScatterBegin(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1277 PetscCall(VecScatterEnd(a->Mvctx, a->lvec, zz, ADD_VALUES, SCATTER_REVERSE)); 1278 PetscFunctionReturn(0); 1279 } 1280 1281 /* 1282 This only works correctly for square matrices where the subblock A->A is the 1283 diagonal block 1284 */ 1285 PetscErrorCode MatGetDiagonal_MPIBAIJ(Mat A, Vec v) 1286 { 1287 PetscFunctionBegin; 1288 PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_SUP, "Supports only square matrix where A->A is diag block"); 1289 PetscCall(MatGetDiagonal(((Mat_MPIBAIJ *)A->data)->A, v)); 1290 PetscFunctionReturn(0); 1291 } 1292 1293 PetscErrorCode MatScale_MPIBAIJ(Mat A, PetscScalar aa) 1294 { 1295 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1296 1297 PetscFunctionBegin; 1298 PetscCall(MatScale(a->A, aa)); 1299 PetscCall(MatScale(a->B, aa)); 1300 PetscFunctionReturn(0); 1301 } 1302 1303 PetscErrorCode MatGetRow_MPIBAIJ(Mat matin, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1304 { 1305 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 1306 PetscScalar *vworkA, *vworkB, **pvA, **pvB, *v_p; 1307 PetscInt bs = matin->rmap->bs, bs2 = mat->bs2, i, *cworkA, *cworkB, **pcA, **pcB; 1308 PetscInt nztot, nzA, nzB, lrow, brstart = matin->rmap->rstart, brend = matin->rmap->rend; 1309 PetscInt *cmap, *idx_p, cstart = mat->cstartbs; 1310 1311 PetscFunctionBegin; 1312 PetscCheck(row >= brstart && row < brend, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only local rows"); 1313 PetscCheck(!mat->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Already active"); 1314 mat->getrowactive = PETSC_TRUE; 1315 1316 if (!mat->rowvalues && (idx || v)) { 1317 /* 1318 allocate enough space to hold information from the longest row. 1319 */ 1320 Mat_SeqBAIJ *Aa = (Mat_SeqBAIJ *)mat->A->data, *Ba = (Mat_SeqBAIJ *)mat->B->data; 1321 PetscInt max = 1, mbs = mat->mbs, tmp; 1322 for (i = 0; i < mbs; i++) { 1323 tmp = Aa->i[i + 1] - Aa->i[i] + Ba->i[i + 1] - Ba->i[i]; 1324 if (max < tmp) max = tmp; 1325 } 1326 PetscCall(PetscMalloc2(max * bs2, &mat->rowvalues, max * bs2, &mat->rowindices)); 1327 } 1328 lrow = row - brstart; 1329 1330 pvA = &vworkA; 1331 pcA = &cworkA; 1332 pvB = &vworkB; 1333 pcB = &cworkB; 1334 if (!v) { 1335 pvA = NULL; 1336 pvB = NULL; 1337 } 1338 if (!idx) { 1339 pcA = NULL; 1340 if (!v) pcB = NULL; 1341 } 1342 PetscCall((*mat->A->ops->getrow)(mat->A, lrow, &nzA, pcA, pvA)); 1343 PetscCall((*mat->B->ops->getrow)(mat->B, lrow, &nzB, pcB, pvB)); 1344 nztot = nzA + nzB; 1345 1346 cmap = mat->garray; 1347 if (v || idx) { 1348 if (nztot) { 1349 /* Sort by increasing column numbers, assuming A and B already sorted */ 1350 PetscInt imark = -1; 1351 if (v) { 1352 *v = v_p = mat->rowvalues; 1353 for (i = 0; i < nzB; i++) { 1354 if (cmap[cworkB[i] / bs] < cstart) v_p[i] = vworkB[i]; 1355 else break; 1356 } 1357 imark = i; 1358 for (i = 0; i < nzA; i++) v_p[imark + i] = vworkA[i]; 1359 for (i = imark; i < nzB; i++) v_p[nzA + i] = vworkB[i]; 1360 } 1361 if (idx) { 1362 *idx = idx_p = mat->rowindices; 1363 if (imark > -1) { 1364 for (i = 0; i < imark; i++) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1365 } else { 1366 for (i = 0; i < nzB; i++) { 1367 if (cmap[cworkB[i] / bs] < cstart) idx_p[i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1368 else break; 1369 } 1370 imark = i; 1371 } 1372 for (i = 0; i < nzA; i++) idx_p[imark + i] = cstart * bs + cworkA[i]; 1373 for (i = imark; i < nzB; i++) idx_p[nzA + i] = cmap[cworkB[i] / bs] * bs + cworkB[i] % bs; 1374 } 1375 } else { 1376 if (idx) *idx = NULL; 1377 if (v) *v = NULL; 1378 } 1379 } 1380 *nz = nztot; 1381 PetscCall((*mat->A->ops->restorerow)(mat->A, lrow, &nzA, pcA, pvA)); 1382 PetscCall((*mat->B->ops->restorerow)(mat->B, lrow, &nzB, pcB, pvB)); 1383 PetscFunctionReturn(0); 1384 } 1385 1386 PetscErrorCode MatRestoreRow_MPIBAIJ(Mat mat, PetscInt row, PetscInt *nz, PetscInt **idx, PetscScalar **v) 1387 { 1388 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1389 1390 PetscFunctionBegin; 1391 PetscCheck(baij->getrowactive, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "MatGetRow not called"); 1392 baij->getrowactive = PETSC_FALSE; 1393 PetscFunctionReturn(0); 1394 } 1395 1396 PetscErrorCode MatZeroEntries_MPIBAIJ(Mat A) 1397 { 1398 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1399 1400 PetscFunctionBegin; 1401 PetscCall(MatZeroEntries(l->A)); 1402 PetscCall(MatZeroEntries(l->B)); 1403 PetscFunctionReturn(0); 1404 } 1405 1406 PetscErrorCode MatGetInfo_MPIBAIJ(Mat matin, MatInfoType flag, MatInfo *info) 1407 { 1408 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)matin->data; 1409 Mat A = a->A, B = a->B; 1410 PetscLogDouble isend[5], irecv[5]; 1411 1412 PetscFunctionBegin; 1413 info->block_size = (PetscReal)matin->rmap->bs; 1414 1415 PetscCall(MatGetInfo(A, MAT_LOCAL, info)); 1416 1417 isend[0] = info->nz_used; 1418 isend[1] = info->nz_allocated; 1419 isend[2] = info->nz_unneeded; 1420 isend[3] = info->memory; 1421 isend[4] = info->mallocs; 1422 1423 PetscCall(MatGetInfo(B, MAT_LOCAL, info)); 1424 1425 isend[0] += info->nz_used; 1426 isend[1] += info->nz_allocated; 1427 isend[2] += info->nz_unneeded; 1428 isend[3] += info->memory; 1429 isend[4] += info->mallocs; 1430 1431 if (flag == MAT_LOCAL) { 1432 info->nz_used = isend[0]; 1433 info->nz_allocated = isend[1]; 1434 info->nz_unneeded = isend[2]; 1435 info->memory = isend[3]; 1436 info->mallocs = isend[4]; 1437 } else if (flag == MAT_GLOBAL_MAX) { 1438 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_MAX, PetscObjectComm((PetscObject)matin))); 1439 1440 info->nz_used = irecv[0]; 1441 info->nz_allocated = irecv[1]; 1442 info->nz_unneeded = irecv[2]; 1443 info->memory = irecv[3]; 1444 info->mallocs = irecv[4]; 1445 } else if (flag == MAT_GLOBAL_SUM) { 1446 PetscCall(MPIU_Allreduce(isend, irecv, 5, MPIU_PETSCLOGDOUBLE, MPI_SUM, PetscObjectComm((PetscObject)matin))); 1447 1448 info->nz_used = irecv[0]; 1449 info->nz_allocated = irecv[1]; 1450 info->nz_unneeded = irecv[2]; 1451 info->memory = irecv[3]; 1452 info->mallocs = irecv[4]; 1453 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_ARG_WRONG, "Unknown MatInfoType argument %d", (int)flag); 1454 info->fill_ratio_given = 0; /* no parallel LU/ILU/Cholesky */ 1455 info->fill_ratio_needed = 0; 1456 info->factor_mallocs = 0; 1457 PetscFunctionReturn(0); 1458 } 1459 1460 PetscErrorCode MatSetOption_MPIBAIJ(Mat A, MatOption op, PetscBool flg) 1461 { 1462 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1463 1464 PetscFunctionBegin; 1465 switch (op) { 1466 case MAT_NEW_NONZERO_LOCATIONS: 1467 case MAT_NEW_NONZERO_ALLOCATION_ERR: 1468 case MAT_UNUSED_NONZERO_LOCATION_ERR: 1469 case MAT_KEEP_NONZERO_PATTERN: 1470 case MAT_NEW_NONZERO_LOCATION_ERR: 1471 MatCheckPreallocated(A, 1); 1472 PetscCall(MatSetOption(a->A, op, flg)); 1473 PetscCall(MatSetOption(a->B, op, flg)); 1474 break; 1475 case MAT_ROW_ORIENTED: 1476 MatCheckPreallocated(A, 1); 1477 a->roworiented = flg; 1478 1479 PetscCall(MatSetOption(a->A, op, flg)); 1480 PetscCall(MatSetOption(a->B, op, flg)); 1481 break; 1482 case MAT_FORCE_DIAGONAL_ENTRIES: 1483 case MAT_SORTED_FULL: 1484 PetscCall(PetscInfo(A, "Option %s ignored\n", MatOptions[op])); 1485 break; 1486 case MAT_IGNORE_OFF_PROC_ENTRIES: 1487 a->donotstash = flg; 1488 break; 1489 case MAT_USE_HASH_TABLE: 1490 a->ht_flag = flg; 1491 a->ht_fact = 1.39; 1492 break; 1493 case MAT_SYMMETRIC: 1494 case MAT_STRUCTURALLY_SYMMETRIC: 1495 case MAT_HERMITIAN: 1496 case MAT_SUBMAT_SINGLEIS: 1497 case MAT_SYMMETRY_ETERNAL: 1498 case MAT_STRUCTURAL_SYMMETRY_ETERNAL: 1499 case MAT_SPD_ETERNAL: 1500 /* if the diagonal matrix is square it inherits some of the properties above */ 1501 break; 1502 default: 1503 SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "unknown option %d", op); 1504 } 1505 PetscFunctionReturn(0); 1506 } 1507 1508 PetscErrorCode MatTranspose_MPIBAIJ(Mat A, MatReuse reuse, Mat *matout) 1509 { 1510 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)A->data; 1511 Mat_SeqBAIJ *Aloc; 1512 Mat B; 1513 PetscInt M = A->rmap->N, N = A->cmap->N, *ai, *aj, i, *rvals, j, k, col; 1514 PetscInt bs = A->rmap->bs, mbs = baij->mbs; 1515 MatScalar *a; 1516 1517 PetscFunctionBegin; 1518 if (reuse == MAT_REUSE_MATRIX) PetscCall(MatTransposeCheckNonzeroState_Private(A, *matout)); 1519 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_INPLACE_MATRIX) { 1520 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 1521 PetscCall(MatSetSizes(B, A->cmap->n, A->rmap->n, N, M)); 1522 PetscCall(MatSetType(B, ((PetscObject)A)->type_name)); 1523 /* Do not know preallocation information, but must set block size */ 1524 PetscCall(MatMPIBAIJSetPreallocation(B, A->rmap->bs, PETSC_DECIDE, NULL, PETSC_DECIDE, NULL)); 1525 } else { 1526 B = *matout; 1527 } 1528 1529 /* copy over the A part */ 1530 Aloc = (Mat_SeqBAIJ *)baij->A->data; 1531 ai = Aloc->i; 1532 aj = Aloc->j; 1533 a = Aloc->a; 1534 PetscCall(PetscMalloc1(bs, &rvals)); 1535 1536 for (i = 0; i < mbs; i++) { 1537 rvals[0] = bs * (baij->rstartbs + i); 1538 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1539 for (j = ai[i]; j < ai[i + 1]; j++) { 1540 col = (baij->cstartbs + aj[j]) * bs; 1541 for (k = 0; k < bs; k++) { 1542 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1543 1544 col++; 1545 a += bs; 1546 } 1547 } 1548 } 1549 /* copy over the B part */ 1550 Aloc = (Mat_SeqBAIJ *)baij->B->data; 1551 ai = Aloc->i; 1552 aj = Aloc->j; 1553 a = Aloc->a; 1554 for (i = 0; i < mbs; i++) { 1555 rvals[0] = bs * (baij->rstartbs + i); 1556 for (j = 1; j < bs; j++) rvals[j] = rvals[j - 1] + 1; 1557 for (j = ai[i]; j < ai[i + 1]; j++) { 1558 col = baij->garray[aj[j]] * bs; 1559 for (k = 0; k < bs; k++) { 1560 PetscCall(MatSetValues_MPIBAIJ(B, 1, &col, bs, rvals, a, INSERT_VALUES)); 1561 col++; 1562 a += bs; 1563 } 1564 } 1565 } 1566 PetscCall(PetscFree(rvals)); 1567 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 1568 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 1569 1570 if (reuse == MAT_INITIAL_MATRIX || reuse == MAT_REUSE_MATRIX) *matout = B; 1571 else PetscCall(MatHeaderMerge(A, &B)); 1572 PetscFunctionReturn(0); 1573 } 1574 1575 PetscErrorCode MatDiagonalScale_MPIBAIJ(Mat mat, Vec ll, Vec rr) 1576 { 1577 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 1578 Mat a = baij->A, b = baij->B; 1579 PetscInt s1, s2, s3; 1580 1581 PetscFunctionBegin; 1582 PetscCall(MatGetLocalSize(mat, &s2, &s3)); 1583 if (rr) { 1584 PetscCall(VecGetLocalSize(rr, &s1)); 1585 PetscCheck(s1 == s3, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "right vector non-conforming local size"); 1586 /* Overlap communication with computation. */ 1587 PetscCall(VecScatterBegin(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1588 } 1589 if (ll) { 1590 PetscCall(VecGetLocalSize(ll, &s1)); 1591 PetscCheck(s1 == s2, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "left vector non-conforming local size"); 1592 PetscUseTypeMethod(b, diagonalscale, ll, NULL); 1593 } 1594 /* scale the diagonal block */ 1595 PetscUseTypeMethod(a, diagonalscale, ll, rr); 1596 1597 if (rr) { 1598 /* Do a scatter end and then right scale the off-diagonal block */ 1599 PetscCall(VecScatterEnd(baij->Mvctx, rr, baij->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1600 PetscUseTypeMethod(b, diagonalscale, NULL, baij->lvec); 1601 } 1602 PetscFunctionReturn(0); 1603 } 1604 1605 PetscErrorCode MatZeroRows_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1606 { 1607 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1608 PetscInt *lrows; 1609 PetscInt r, len; 1610 PetscBool cong; 1611 1612 PetscFunctionBegin; 1613 /* get locally owned rows */ 1614 PetscCall(MatZeroRowsMapLocal_Private(A, N, rows, &len, &lrows)); 1615 /* fix right hand side if needed */ 1616 if (x && b) { 1617 const PetscScalar *xx; 1618 PetscScalar *bb; 1619 1620 PetscCall(VecGetArrayRead(x, &xx)); 1621 PetscCall(VecGetArray(b, &bb)); 1622 for (r = 0; r < len; ++r) bb[lrows[r]] = diag * xx[lrows[r]]; 1623 PetscCall(VecRestoreArrayRead(x, &xx)); 1624 PetscCall(VecRestoreArray(b, &bb)); 1625 } 1626 1627 /* actually zap the local rows */ 1628 /* 1629 Zero the required rows. If the "diagonal block" of the matrix 1630 is square and the user wishes to set the diagonal we use separate 1631 code so that MatSetValues() is not called for each diagonal allocating 1632 new memory, thus calling lots of mallocs and slowing things down. 1633 1634 */ 1635 /* must zero l->B before l->A because the (diag) case below may put values into l->B*/ 1636 PetscCall(MatZeroRows_SeqBAIJ(l->B, len, lrows, 0.0, NULL, NULL)); 1637 PetscCall(MatHasCongruentLayouts(A, &cong)); 1638 if ((diag != 0.0) && cong) { 1639 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, diag, NULL, NULL)); 1640 } else if (diag != 0.0) { 1641 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1642 PetscCheck(!((Mat_SeqBAIJ *)l->A->data)->nonew, PETSC_COMM_SELF, PETSC_ERR_SUP, "MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\ 1643 MAT_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR"); 1644 for (r = 0; r < len; ++r) { 1645 const PetscInt row = lrows[r] + A->rmap->rstart; 1646 PetscCall(MatSetValues(A, 1, &row, 1, &row, &diag, INSERT_VALUES)); 1647 } 1648 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 1649 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 1650 } else { 1651 PetscCall(MatZeroRows_SeqBAIJ(l->A, len, lrows, 0.0, NULL, NULL)); 1652 } 1653 PetscCall(PetscFree(lrows)); 1654 1655 /* only change matrix nonzero state if pattern was allowed to be changed */ 1656 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1657 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1658 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode MatZeroRowsColumns_MPIBAIJ(Mat A, PetscInt N, const PetscInt rows[], PetscScalar diag, Vec x, Vec b) 1664 { 1665 Mat_MPIBAIJ *l = (Mat_MPIBAIJ *)A->data; 1666 PetscMPIInt n = A->rmap->n, p = 0; 1667 PetscInt i, j, k, r, len = 0, row, col, count; 1668 PetscInt *lrows, *owners = A->rmap->range; 1669 PetscSFNode *rrows; 1670 PetscSF sf; 1671 const PetscScalar *xx; 1672 PetscScalar *bb, *mask; 1673 Vec xmask, lmask; 1674 Mat_SeqBAIJ *baij = (Mat_SeqBAIJ *)l->B->data; 1675 PetscInt bs = A->rmap->bs, bs2 = baij->bs2; 1676 PetscScalar *aa; 1677 1678 PetscFunctionBegin; 1679 /* Create SF where leaves are input rows and roots are owned rows */ 1680 PetscCall(PetscMalloc1(n, &lrows)); 1681 for (r = 0; r < n; ++r) lrows[r] = -1; 1682 PetscCall(PetscMalloc1(N, &rrows)); 1683 for (r = 0; r < N; ++r) { 1684 const PetscInt idx = rows[r]; 1685 PetscCheck(idx >= 0 && A->rmap->N > idx, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row %" PetscInt_FMT " out of range [0,%" PetscInt_FMT ")", idx, A->rmap->N); 1686 if (idx < owners[p] || owners[p + 1] <= idx) { /* short-circuit the search if the last p owns this row too */ 1687 PetscCall(PetscLayoutFindOwner(A->rmap, idx, &p)); 1688 } 1689 rrows[r].rank = p; 1690 rrows[r].index = rows[r] - owners[p]; 1691 } 1692 PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)A), &sf)); 1693 PetscCall(PetscSFSetGraph(sf, n, N, NULL, PETSC_OWN_POINTER, rrows, PETSC_OWN_POINTER)); 1694 /* Collect flags for rows to be zeroed */ 1695 PetscCall(PetscSFReduceBegin(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1696 PetscCall(PetscSFReduceEnd(sf, MPIU_INT, (PetscInt *)rows, lrows, MPI_LOR)); 1697 PetscCall(PetscSFDestroy(&sf)); 1698 /* Compress and put in row numbers */ 1699 for (r = 0; r < n; ++r) 1700 if (lrows[r] >= 0) lrows[len++] = r; 1701 /* zero diagonal part of matrix */ 1702 PetscCall(MatZeroRowsColumns(l->A, len, lrows, diag, x, b)); 1703 /* handle off diagonal part of matrix */ 1704 PetscCall(MatCreateVecs(A, &xmask, NULL)); 1705 PetscCall(VecDuplicate(l->lvec, &lmask)); 1706 PetscCall(VecGetArray(xmask, &bb)); 1707 for (i = 0; i < len; i++) bb[lrows[i]] = 1; 1708 PetscCall(VecRestoreArray(xmask, &bb)); 1709 PetscCall(VecScatterBegin(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1710 PetscCall(VecScatterEnd(l->Mvctx, xmask, lmask, ADD_VALUES, SCATTER_FORWARD)); 1711 PetscCall(VecDestroy(&xmask)); 1712 if (x) { 1713 PetscCall(VecScatterBegin(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1714 PetscCall(VecScatterEnd(l->Mvctx, x, l->lvec, INSERT_VALUES, SCATTER_FORWARD)); 1715 PetscCall(VecGetArrayRead(l->lvec, &xx)); 1716 PetscCall(VecGetArray(b, &bb)); 1717 } 1718 PetscCall(VecGetArray(lmask, &mask)); 1719 /* remove zeroed rows of off diagonal matrix */ 1720 for (i = 0; i < len; ++i) { 1721 row = lrows[i]; 1722 count = (baij->i[row / bs + 1] - baij->i[row / bs]) * bs; 1723 aa = ((MatScalar *)(baij->a)) + baij->i[row / bs] * bs2 + (row % bs); 1724 for (k = 0; k < count; ++k) { 1725 aa[0] = 0.0; 1726 aa += bs; 1727 } 1728 } 1729 /* loop over all elements of off process part of matrix zeroing removed columns*/ 1730 for (i = 0; i < l->B->rmap->N; ++i) { 1731 row = i / bs; 1732 for (j = baij->i[row]; j < baij->i[row + 1]; ++j) { 1733 for (k = 0; k < bs; ++k) { 1734 col = bs * baij->j[j] + k; 1735 if (PetscAbsScalar(mask[col])) { 1736 aa = ((MatScalar *)(baij->a)) + j * bs2 + (i % bs) + bs * k; 1737 if (x) bb[i] -= aa[0] * xx[col]; 1738 aa[0] = 0.0; 1739 } 1740 } 1741 } 1742 } 1743 if (x) { 1744 PetscCall(VecRestoreArray(b, &bb)); 1745 PetscCall(VecRestoreArrayRead(l->lvec, &xx)); 1746 } 1747 PetscCall(VecRestoreArray(lmask, &mask)); 1748 PetscCall(VecDestroy(&lmask)); 1749 PetscCall(PetscFree(lrows)); 1750 1751 /* only change matrix nonzero state if pattern was allowed to be changed */ 1752 if (!((Mat_SeqBAIJ *)(l->A->data))->keepnonzeropattern) { 1753 PetscObjectState state = l->A->nonzerostate + l->B->nonzerostate; 1754 PetscCall(MPIU_Allreduce(&state, &A->nonzerostate, 1, MPIU_INT64, MPI_SUM, PetscObjectComm((PetscObject)A))); 1755 } 1756 PetscFunctionReturn(0); 1757 } 1758 1759 PetscErrorCode MatSetUnfactored_MPIBAIJ(Mat A) 1760 { 1761 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1762 1763 PetscFunctionBegin; 1764 PetscCall(MatSetUnfactored(a->A)); 1765 PetscFunctionReturn(0); 1766 } 1767 1768 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat, MatDuplicateOption, Mat *); 1769 1770 PetscErrorCode MatEqual_MPIBAIJ(Mat A, Mat B, PetscBool *flag) 1771 { 1772 Mat_MPIBAIJ *matB = (Mat_MPIBAIJ *)B->data, *matA = (Mat_MPIBAIJ *)A->data; 1773 Mat a, b, c, d; 1774 PetscBool flg; 1775 1776 PetscFunctionBegin; 1777 a = matA->A; 1778 b = matA->B; 1779 c = matB->A; 1780 d = matB->B; 1781 1782 PetscCall(MatEqual(a, c, &flg)); 1783 if (flg) PetscCall(MatEqual(b, d, &flg)); 1784 PetscCall(MPIU_Allreduce(&flg, flag, 1, MPIU_BOOL, MPI_LAND, PetscObjectComm((PetscObject)A))); 1785 PetscFunctionReturn(0); 1786 } 1787 1788 PetscErrorCode MatCopy_MPIBAIJ(Mat A, Mat B, MatStructure str) 1789 { 1790 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1791 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 1792 1793 PetscFunctionBegin; 1794 /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */ 1795 if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) { 1796 PetscCall(MatCopy_Basic(A, B, str)); 1797 } else { 1798 PetscCall(MatCopy(a->A, b->A, str)); 1799 PetscCall(MatCopy(a->B, b->B, str)); 1800 } 1801 PetscCall(PetscObjectStateIncrease((PetscObject)B)); 1802 PetscFunctionReturn(0); 1803 } 1804 1805 PetscErrorCode MatSetUp_MPIBAIJ(Mat A) 1806 { 1807 PetscFunctionBegin; 1808 PetscCall(MatMPIBAIJSetPreallocation(A, A->rmap->bs, PETSC_DEFAULT, NULL, PETSC_DEFAULT, NULL)); 1809 PetscFunctionReturn(0); 1810 } 1811 1812 PetscErrorCode MatAXPYGetPreallocation_MPIBAIJ(Mat Y, const PetscInt *yltog, Mat X, const PetscInt *xltog, PetscInt *nnz) 1813 { 1814 PetscInt bs = Y->rmap->bs, m = Y->rmap->N / bs; 1815 Mat_SeqBAIJ *x = (Mat_SeqBAIJ *)X->data; 1816 Mat_SeqBAIJ *y = (Mat_SeqBAIJ *)Y->data; 1817 1818 PetscFunctionBegin; 1819 PetscCall(MatAXPYGetPreallocation_MPIX_private(m, x->i, x->j, xltog, y->i, y->j, yltog, nnz)); 1820 PetscFunctionReturn(0); 1821 } 1822 1823 PetscErrorCode MatAXPY_MPIBAIJ(Mat Y, PetscScalar a, Mat X, MatStructure str) 1824 { 1825 Mat_MPIBAIJ *xx = (Mat_MPIBAIJ *)X->data, *yy = (Mat_MPIBAIJ *)Y->data; 1826 PetscBLASInt bnz, one = 1; 1827 Mat_SeqBAIJ *x, *y; 1828 PetscInt bs2 = Y->rmap->bs * Y->rmap->bs; 1829 1830 PetscFunctionBegin; 1831 if (str == SAME_NONZERO_PATTERN) { 1832 PetscScalar alpha = a; 1833 x = (Mat_SeqBAIJ *)xx->A->data; 1834 y = (Mat_SeqBAIJ *)yy->A->data; 1835 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1836 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1837 x = (Mat_SeqBAIJ *)xx->B->data; 1838 y = (Mat_SeqBAIJ *)yy->B->data; 1839 PetscCall(PetscBLASIntCast(x->nz * bs2, &bnz)); 1840 PetscCallBLAS("BLASaxpy", BLASaxpy_(&bnz, &alpha, x->a, &one, y->a, &one)); 1841 PetscCall(PetscObjectStateIncrease((PetscObject)Y)); 1842 } else if (str == SUBSET_NONZERO_PATTERN) { /* nonzeros of X is a subset of Y's */ 1843 PetscCall(MatAXPY_Basic(Y, a, X, str)); 1844 } else { 1845 Mat B; 1846 PetscInt *nnz_d, *nnz_o, bs = Y->rmap->bs; 1847 PetscCall(PetscMalloc1(yy->A->rmap->N, &nnz_d)); 1848 PetscCall(PetscMalloc1(yy->B->rmap->N, &nnz_o)); 1849 PetscCall(MatCreate(PetscObjectComm((PetscObject)Y), &B)); 1850 PetscCall(PetscObjectSetName((PetscObject)B, ((PetscObject)Y)->name)); 1851 PetscCall(MatSetSizes(B, Y->rmap->n, Y->cmap->n, Y->rmap->N, Y->cmap->N)); 1852 PetscCall(MatSetBlockSizesFromMats(B, Y, Y)); 1853 PetscCall(MatSetType(B, MATMPIBAIJ)); 1854 PetscCall(MatAXPYGetPreallocation_SeqBAIJ(yy->A, xx->A, nnz_d)); 1855 PetscCall(MatAXPYGetPreallocation_MPIBAIJ(yy->B, yy->garray, xx->B, xx->garray, nnz_o)); 1856 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, nnz_d, 0, nnz_o)); 1857 /* MatAXPY_BasicWithPreallocation() for BAIJ matrix is much slower than AIJ, even for bs=1 ! */ 1858 PetscCall(MatAXPY_BasicWithPreallocation(B, Y, a, X, str)); 1859 PetscCall(MatHeaderMerge(Y, &B)); 1860 PetscCall(PetscFree(nnz_d)); 1861 PetscCall(PetscFree(nnz_o)); 1862 } 1863 PetscFunctionReturn(0); 1864 } 1865 1866 PetscErrorCode MatConjugate_MPIBAIJ(Mat mat) 1867 { 1868 PetscFunctionBegin; 1869 if (PetscDefined(USE_COMPLEX)) { 1870 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)mat->data; 1871 1872 PetscCall(MatConjugate_SeqBAIJ(a->A)); 1873 PetscCall(MatConjugate_SeqBAIJ(a->B)); 1874 } 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode MatRealPart_MPIBAIJ(Mat A) 1879 { 1880 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1881 1882 PetscFunctionBegin; 1883 PetscCall(MatRealPart(a->A)); 1884 PetscCall(MatRealPart(a->B)); 1885 PetscFunctionReturn(0); 1886 } 1887 1888 PetscErrorCode MatImaginaryPart_MPIBAIJ(Mat A) 1889 { 1890 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 1891 1892 PetscFunctionBegin; 1893 PetscCall(MatImaginaryPart(a->A)); 1894 PetscCall(MatImaginaryPart(a->B)); 1895 PetscFunctionReturn(0); 1896 } 1897 1898 PetscErrorCode MatCreateSubMatrix_MPIBAIJ(Mat mat, IS isrow, IS iscol, MatReuse call, Mat *newmat) 1899 { 1900 IS iscol_local; 1901 PetscInt csize; 1902 1903 PetscFunctionBegin; 1904 PetscCall(ISGetLocalSize(iscol, &csize)); 1905 if (call == MAT_REUSE_MATRIX) { 1906 PetscCall(PetscObjectQuery((PetscObject)*newmat, "ISAllGather", (PetscObject *)&iscol_local)); 1907 PetscCheck(iscol_local, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1908 } else { 1909 PetscCall(ISAllGather(iscol, &iscol_local)); 1910 } 1911 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(mat, isrow, iscol_local, csize, call, newmat)); 1912 if (call == MAT_INITIAL_MATRIX) { 1913 PetscCall(PetscObjectCompose((PetscObject)*newmat, "ISAllGather", (PetscObject)iscol_local)); 1914 PetscCall(ISDestroy(&iscol_local)); 1915 } 1916 PetscFunctionReturn(0); 1917 } 1918 1919 /* 1920 Not great since it makes two copies of the submatrix, first an SeqBAIJ 1921 in local and then by concatenating the local matrices the end result. 1922 Writing it directly would be much like MatCreateSubMatrices_MPIBAIJ(). 1923 This routine is used for BAIJ and SBAIJ matrices (unfortunate dependency). 1924 */ 1925 PetscErrorCode MatCreateSubMatrix_MPIBAIJ_Private(Mat mat, IS isrow, IS iscol, PetscInt csize, MatReuse call, Mat *newmat) 1926 { 1927 PetscMPIInt rank, size; 1928 PetscInt i, m, n, rstart, row, rend, nz, *cwork, j, bs; 1929 PetscInt *ii, *jj, nlocal, *dlens, *olens, dlen, olen, jend, mglobal; 1930 Mat M, Mreuse; 1931 MatScalar *vwork, *aa; 1932 MPI_Comm comm; 1933 IS isrow_new, iscol_new; 1934 Mat_SeqBAIJ *aij; 1935 1936 PetscFunctionBegin; 1937 PetscCall(PetscObjectGetComm((PetscObject)mat, &comm)); 1938 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 1939 PetscCallMPI(MPI_Comm_size(comm, &size)); 1940 /* The compression and expansion should be avoided. Doesn't point 1941 out errors, might change the indices, hence buggey */ 1942 PetscCall(ISCompressIndicesGeneral(mat->rmap->N, mat->rmap->n, mat->rmap->bs, 1, &isrow, &isrow_new)); 1943 PetscCall(ISCompressIndicesGeneral(mat->cmap->N, mat->cmap->n, mat->cmap->bs, 1, &iscol, &iscol_new)); 1944 1945 if (call == MAT_REUSE_MATRIX) { 1946 PetscCall(PetscObjectQuery((PetscObject)*newmat, "SubMatrix", (PetscObject *)&Mreuse)); 1947 PetscCheck(Mreuse, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Submatrix passed in was not used before, cannot reuse"); 1948 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_REUSE_MATRIX, &Mreuse)); 1949 } else { 1950 PetscCall(MatCreateSubMatrices_MPIBAIJ_local(mat, 1, &isrow_new, &iscol_new, MAT_INITIAL_MATRIX, &Mreuse)); 1951 } 1952 PetscCall(ISDestroy(&isrow_new)); 1953 PetscCall(ISDestroy(&iscol_new)); 1954 /* 1955 m - number of local rows 1956 n - number of columns (same on all processors) 1957 rstart - first row in new global matrix generated 1958 */ 1959 PetscCall(MatGetBlockSize(mat, &bs)); 1960 PetscCall(MatGetSize(Mreuse, &m, &n)); 1961 m = m / bs; 1962 n = n / bs; 1963 1964 if (call == MAT_INITIAL_MATRIX) { 1965 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 1966 ii = aij->i; 1967 jj = aij->j; 1968 1969 /* 1970 Determine the number of non-zeros in the diagonal and off-diagonal 1971 portions of the matrix in order to do correct preallocation 1972 */ 1973 1974 /* first get start and end of "diagonal" columns */ 1975 if (csize == PETSC_DECIDE) { 1976 PetscCall(ISGetSize(isrow, &mglobal)); 1977 if (mglobal == n * bs) { /* square matrix */ 1978 nlocal = m; 1979 } else { 1980 nlocal = n / size + ((n % size) > rank); 1981 } 1982 } else { 1983 nlocal = csize / bs; 1984 } 1985 PetscCallMPI(MPI_Scan(&nlocal, &rend, 1, MPIU_INT, MPI_SUM, comm)); 1986 rstart = rend - nlocal; 1987 PetscCheck(rank != size - 1 || rend == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Local column sizes %" PetscInt_FMT " do not add up to total number of columns %" PetscInt_FMT, rend, n); 1988 1989 /* next, compute all the lengths */ 1990 PetscCall(PetscMalloc2(m + 1, &dlens, m + 1, &olens)); 1991 for (i = 0; i < m; i++) { 1992 jend = ii[i + 1] - ii[i]; 1993 olen = 0; 1994 dlen = 0; 1995 for (j = 0; j < jend; j++) { 1996 if (*jj < rstart || *jj >= rend) olen++; 1997 else dlen++; 1998 jj++; 1999 } 2000 olens[i] = olen; 2001 dlens[i] = dlen; 2002 } 2003 PetscCall(MatCreate(comm, &M)); 2004 PetscCall(MatSetSizes(M, bs * m, bs * nlocal, PETSC_DECIDE, bs * n)); 2005 PetscCall(MatSetType(M, ((PetscObject)mat)->type_name)); 2006 PetscCall(MatMPIBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2007 PetscCall(MatMPISBAIJSetPreallocation(M, bs, 0, dlens, 0, olens)); 2008 PetscCall(PetscFree2(dlens, olens)); 2009 } else { 2010 PetscInt ml, nl; 2011 2012 M = *newmat; 2013 PetscCall(MatGetLocalSize(M, &ml, &nl)); 2014 PetscCheck(ml == m, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Previous matrix must be same size/layout as request"); 2015 PetscCall(MatZeroEntries(M)); 2016 /* 2017 The next two lines are needed so we may call MatSetValues_MPIAIJ() below directly, 2018 rather than the slower MatSetValues(). 2019 */ 2020 M->was_assembled = PETSC_TRUE; 2021 M->assembled = PETSC_FALSE; 2022 } 2023 PetscCall(MatSetOption(M, MAT_ROW_ORIENTED, PETSC_FALSE)); 2024 PetscCall(MatGetOwnershipRange(M, &rstart, &rend)); 2025 aij = (Mat_SeqBAIJ *)(Mreuse)->data; 2026 ii = aij->i; 2027 jj = aij->j; 2028 aa = aij->a; 2029 for (i = 0; i < m; i++) { 2030 row = rstart / bs + i; 2031 nz = ii[i + 1] - ii[i]; 2032 cwork = jj; 2033 jj += nz; 2034 vwork = aa; 2035 aa += nz * bs * bs; 2036 PetscCall(MatSetValuesBlocked_MPIBAIJ(M, 1, &row, nz, cwork, vwork, INSERT_VALUES)); 2037 } 2038 2039 PetscCall(MatAssemblyBegin(M, MAT_FINAL_ASSEMBLY)); 2040 PetscCall(MatAssemblyEnd(M, MAT_FINAL_ASSEMBLY)); 2041 *newmat = M; 2042 2043 /* save submatrix used in processor for next request */ 2044 if (call == MAT_INITIAL_MATRIX) { 2045 PetscCall(PetscObjectCompose((PetscObject)M, "SubMatrix", (PetscObject)Mreuse)); 2046 PetscCall(PetscObjectDereference((PetscObject)Mreuse)); 2047 } 2048 PetscFunctionReturn(0); 2049 } 2050 2051 PetscErrorCode MatPermute_MPIBAIJ(Mat A, IS rowp, IS colp, Mat *B) 2052 { 2053 MPI_Comm comm, pcomm; 2054 PetscInt clocal_size, nrows; 2055 const PetscInt *rows; 2056 PetscMPIInt size; 2057 IS crowp, lcolp; 2058 2059 PetscFunctionBegin; 2060 PetscCall(PetscObjectGetComm((PetscObject)A, &comm)); 2061 /* make a collective version of 'rowp' */ 2062 PetscCall(PetscObjectGetComm((PetscObject)rowp, &pcomm)); 2063 if (pcomm == comm) { 2064 crowp = rowp; 2065 } else { 2066 PetscCall(ISGetSize(rowp, &nrows)); 2067 PetscCall(ISGetIndices(rowp, &rows)); 2068 PetscCall(ISCreateGeneral(comm, nrows, rows, PETSC_COPY_VALUES, &crowp)); 2069 PetscCall(ISRestoreIndices(rowp, &rows)); 2070 } 2071 PetscCall(ISSetPermutation(crowp)); 2072 /* make a local version of 'colp' */ 2073 PetscCall(PetscObjectGetComm((PetscObject)colp, &pcomm)); 2074 PetscCallMPI(MPI_Comm_size(pcomm, &size)); 2075 if (size == 1) { 2076 lcolp = colp; 2077 } else { 2078 PetscCall(ISAllGather(colp, &lcolp)); 2079 } 2080 PetscCall(ISSetPermutation(lcolp)); 2081 /* now we just get the submatrix */ 2082 PetscCall(MatGetLocalSize(A, NULL, &clocal_size)); 2083 PetscCall(MatCreateSubMatrix_MPIBAIJ_Private(A, crowp, lcolp, clocal_size, MAT_INITIAL_MATRIX, B)); 2084 /* clean up */ 2085 if (pcomm != comm) PetscCall(ISDestroy(&crowp)); 2086 if (size > 1) PetscCall(ISDestroy(&lcolp)); 2087 PetscFunctionReturn(0); 2088 } 2089 2090 PetscErrorCode MatGetGhosts_MPIBAIJ(Mat mat, PetscInt *nghosts, const PetscInt *ghosts[]) 2091 { 2092 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 2093 Mat_SeqBAIJ *B = (Mat_SeqBAIJ *)baij->B->data; 2094 2095 PetscFunctionBegin; 2096 if (nghosts) *nghosts = B->nbs; 2097 if (ghosts) *ghosts = baij->garray; 2098 PetscFunctionReturn(0); 2099 } 2100 2101 PetscErrorCode MatGetSeqNonzeroStructure_MPIBAIJ(Mat A, Mat *newmat) 2102 { 2103 Mat B; 2104 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2105 Mat_SeqBAIJ *ad = (Mat_SeqBAIJ *)a->A->data, *bd = (Mat_SeqBAIJ *)a->B->data; 2106 Mat_SeqAIJ *b; 2107 PetscMPIInt size, rank, *recvcounts = NULL, *displs = NULL; 2108 PetscInt sendcount, i, *rstarts = A->rmap->range, n, cnt, j, bs = A->rmap->bs; 2109 PetscInt m, *garray = a->garray, *lens, *jsendbuf, *a_jsendbuf, *b_jsendbuf; 2110 2111 PetscFunctionBegin; 2112 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A), &size)); 2113 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)A), &rank)); 2114 2115 /* ---------------------------------------------------------------- 2116 Tell every processor the number of nonzeros per row 2117 */ 2118 PetscCall(PetscMalloc1(A->rmap->N / bs, &lens)); 2119 for (i = A->rmap->rstart / bs; i < A->rmap->rend / bs; i++) lens[i] = ad->i[i - A->rmap->rstart / bs + 1] - ad->i[i - A->rmap->rstart / bs] + bd->i[i - A->rmap->rstart / bs + 1] - bd->i[i - A->rmap->rstart / bs]; 2120 PetscCall(PetscMalloc1(2 * size, &recvcounts)); 2121 displs = recvcounts + size; 2122 for (i = 0; i < size; i++) { 2123 recvcounts[i] = A->rmap->range[i + 1] / bs - A->rmap->range[i] / bs; 2124 displs[i] = A->rmap->range[i] / bs; 2125 } 2126 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, lens, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2127 /* --------------------------------------------------------------- 2128 Create the sequential matrix of the same type as the local block diagonal 2129 */ 2130 PetscCall(MatCreate(PETSC_COMM_SELF, &B)); 2131 PetscCall(MatSetSizes(B, A->rmap->N / bs, A->cmap->N / bs, PETSC_DETERMINE, PETSC_DETERMINE)); 2132 PetscCall(MatSetType(B, MATSEQAIJ)); 2133 PetscCall(MatSeqAIJSetPreallocation(B, 0, lens)); 2134 b = (Mat_SeqAIJ *)B->data; 2135 2136 /*-------------------------------------------------------------------- 2137 Copy my part of matrix column indices over 2138 */ 2139 sendcount = ad->nz + bd->nz; 2140 jsendbuf = b->j + b->i[rstarts[rank] / bs]; 2141 a_jsendbuf = ad->j; 2142 b_jsendbuf = bd->j; 2143 n = A->rmap->rend / bs - A->rmap->rstart / bs; 2144 cnt = 0; 2145 for (i = 0; i < n; i++) { 2146 /* put in lower diagonal portion */ 2147 m = bd->i[i + 1] - bd->i[i]; 2148 while (m > 0) { 2149 /* is it above diagonal (in bd (compressed) numbering) */ 2150 if (garray[*b_jsendbuf] > A->rmap->rstart / bs + i) break; 2151 jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2152 m--; 2153 } 2154 2155 /* put in diagonal portion */ 2156 for (j = ad->i[i]; j < ad->i[i + 1]; j++) jsendbuf[cnt++] = A->rmap->rstart / bs + *a_jsendbuf++; 2157 2158 /* put in upper diagonal portion */ 2159 while (m-- > 0) jsendbuf[cnt++] = garray[*b_jsendbuf++]; 2160 } 2161 PetscCheck(cnt == sendcount, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Corrupted PETSc matrix: nz given %" PetscInt_FMT " actual nz %" PetscInt_FMT, sendcount, cnt); 2162 2163 /*-------------------------------------------------------------------- 2164 Gather all column indices to all processors 2165 */ 2166 for (i = 0; i < size; i++) { 2167 recvcounts[i] = 0; 2168 for (j = A->rmap->range[i] / bs; j < A->rmap->range[i + 1] / bs; j++) recvcounts[i] += lens[j]; 2169 } 2170 displs[0] = 0; 2171 for (i = 1; i < size; i++) displs[i] = displs[i - 1] + recvcounts[i - 1]; 2172 PetscCallMPI(MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, b->j, recvcounts, displs, MPIU_INT, PetscObjectComm((PetscObject)A))); 2173 /*-------------------------------------------------------------------- 2174 Assemble the matrix into useable form (note numerical values not yet set) 2175 */ 2176 /* set the b->ilen (length of each row) values */ 2177 PetscCall(PetscArraycpy(b->ilen, lens, A->rmap->N / bs)); 2178 /* set the b->i indices */ 2179 b->i[0] = 0; 2180 for (i = 1; i <= A->rmap->N / bs; i++) b->i[i] = b->i[i - 1] + lens[i - 1]; 2181 PetscCall(PetscFree(lens)); 2182 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2183 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2184 PetscCall(PetscFree(recvcounts)); 2185 2186 PetscCall(MatPropagateSymmetryOptions(A, B)); 2187 *newmat = B; 2188 PetscFunctionReturn(0); 2189 } 2190 2191 PetscErrorCode MatSOR_MPIBAIJ(Mat matin, Vec bb, PetscReal omega, MatSORType flag, PetscReal fshift, PetscInt its, PetscInt lits, Vec xx) 2192 { 2193 Mat_MPIBAIJ *mat = (Mat_MPIBAIJ *)matin->data; 2194 Vec bb1 = NULL; 2195 2196 PetscFunctionBegin; 2197 if (flag == SOR_APPLY_UPPER) { 2198 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2199 PetscFunctionReturn(0); 2200 } 2201 2202 if (its > 1 || ~flag & SOR_ZERO_INITIAL_GUESS) PetscCall(VecDuplicate(bb, &bb1)); 2203 2204 if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) { 2205 if (flag & SOR_ZERO_INITIAL_GUESS) { 2206 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2207 its--; 2208 } 2209 2210 while (its--) { 2211 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2212 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2213 2214 /* update rhs: bb1 = bb - B*x */ 2215 PetscCall(VecScale(mat->lvec, -1.0)); 2216 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2217 2218 /* local sweep */ 2219 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_SYMMETRIC_SWEEP, fshift, lits, 1, xx)); 2220 } 2221 } else if (flag & SOR_LOCAL_FORWARD_SWEEP) { 2222 if (flag & SOR_ZERO_INITIAL_GUESS) { 2223 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2224 its--; 2225 } 2226 while (its--) { 2227 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2228 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2229 2230 /* update rhs: bb1 = bb - B*x */ 2231 PetscCall(VecScale(mat->lvec, -1.0)); 2232 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2233 2234 /* local sweep */ 2235 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_FORWARD_SWEEP, fshift, lits, 1, xx)); 2236 } 2237 } else if (flag & SOR_LOCAL_BACKWARD_SWEEP) { 2238 if (flag & SOR_ZERO_INITIAL_GUESS) { 2239 PetscCall((*mat->A->ops->sor)(mat->A, bb, omega, flag, fshift, lits, 1, xx)); 2240 its--; 2241 } 2242 while (its--) { 2243 PetscCall(VecScatterBegin(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2244 PetscCall(VecScatterEnd(mat->Mvctx, xx, mat->lvec, INSERT_VALUES, SCATTER_FORWARD)); 2245 2246 /* update rhs: bb1 = bb - B*x */ 2247 PetscCall(VecScale(mat->lvec, -1.0)); 2248 PetscCall((*mat->B->ops->multadd)(mat->B, mat->lvec, bb, bb1)); 2249 2250 /* local sweep */ 2251 PetscCall((*mat->A->ops->sor)(mat->A, bb1, omega, SOR_BACKWARD_SWEEP, fshift, lits, 1, xx)); 2252 } 2253 } else SETERRQ(PetscObjectComm((PetscObject)matin), PETSC_ERR_SUP, "Parallel version of SOR requested not supported"); 2254 2255 PetscCall(VecDestroy(&bb1)); 2256 PetscFunctionReturn(0); 2257 } 2258 2259 PetscErrorCode MatGetColumnReductions_MPIBAIJ(Mat A, PetscInt type, PetscReal *reductions) 2260 { 2261 Mat_MPIBAIJ *aij = (Mat_MPIBAIJ *)A->data; 2262 PetscInt m, N, i, *garray = aij->garray; 2263 PetscInt ib, jb, bs = A->rmap->bs; 2264 Mat_SeqBAIJ *a_aij = (Mat_SeqBAIJ *)aij->A->data; 2265 MatScalar *a_val = a_aij->a; 2266 Mat_SeqBAIJ *b_aij = (Mat_SeqBAIJ *)aij->B->data; 2267 MatScalar *b_val = b_aij->a; 2268 PetscReal *work; 2269 2270 PetscFunctionBegin; 2271 PetscCall(MatGetSize(A, &m, &N)); 2272 PetscCall(PetscCalloc1(N, &work)); 2273 if (type == NORM_2) { 2274 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2275 for (jb = 0; jb < bs; jb++) { 2276 for (ib = 0; ib < bs; ib++) { 2277 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val * *a_val); 2278 a_val++; 2279 } 2280 } 2281 } 2282 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2283 for (jb = 0; jb < bs; jb++) { 2284 for (ib = 0; ib < bs; ib++) { 2285 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val * *b_val); 2286 b_val++; 2287 } 2288 } 2289 } 2290 } else if (type == NORM_1) { 2291 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2292 for (jb = 0; jb < bs; jb++) { 2293 for (ib = 0; ib < bs; ib++) { 2294 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscAbsScalar(*a_val); 2295 a_val++; 2296 } 2297 } 2298 } 2299 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2300 for (jb = 0; jb < bs; jb++) { 2301 for (ib = 0; ib < bs; ib++) { 2302 work[garray[b_aij->j[i]] * bs + jb] += PetscAbsScalar(*b_val); 2303 b_val++; 2304 } 2305 } 2306 } 2307 } else if (type == NORM_INFINITY) { 2308 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2309 for (jb = 0; jb < bs; jb++) { 2310 for (ib = 0; ib < bs; ib++) { 2311 int col = A->cmap->rstart + a_aij->j[i] * bs + jb; 2312 work[col] = PetscMax(PetscAbsScalar(*a_val), work[col]); 2313 a_val++; 2314 } 2315 } 2316 } 2317 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2318 for (jb = 0; jb < bs; jb++) { 2319 for (ib = 0; ib < bs; ib++) { 2320 int col = garray[b_aij->j[i]] * bs + jb; 2321 work[col] = PetscMax(PetscAbsScalar(*b_val), work[col]); 2322 b_val++; 2323 } 2324 } 2325 } 2326 } else if (type == REDUCTION_SUM_REALPART || type == REDUCTION_MEAN_REALPART) { 2327 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2328 for (jb = 0; jb < bs; jb++) { 2329 for (ib = 0; ib < bs; ib++) { 2330 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscRealPart(*a_val); 2331 a_val++; 2332 } 2333 } 2334 } 2335 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2336 for (jb = 0; jb < bs; jb++) { 2337 for (ib = 0; ib < bs; ib++) { 2338 work[garray[b_aij->j[i]] * bs + jb] += PetscRealPart(*b_val); 2339 b_val++; 2340 } 2341 } 2342 } 2343 } else if (type == REDUCTION_SUM_IMAGINARYPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2344 for (i = a_aij->i[0]; i < a_aij->i[aij->A->rmap->n / bs]; i++) { 2345 for (jb = 0; jb < bs; jb++) { 2346 for (ib = 0; ib < bs; ib++) { 2347 work[A->cmap->rstart + a_aij->j[i] * bs + jb] += PetscImaginaryPart(*a_val); 2348 a_val++; 2349 } 2350 } 2351 } 2352 for (i = b_aij->i[0]; i < b_aij->i[aij->B->rmap->n / bs]; i++) { 2353 for (jb = 0; jb < bs; jb++) { 2354 for (ib = 0; ib < bs; ib++) { 2355 work[garray[b_aij->j[i]] * bs + jb] += PetscImaginaryPart(*b_val); 2356 b_val++; 2357 } 2358 } 2359 } 2360 } else SETERRQ(PetscObjectComm((PetscObject)A), PETSC_ERR_ARG_WRONG, "Unknown reduction type"); 2361 if (type == NORM_INFINITY) { 2362 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)A))); 2363 } else { 2364 PetscCall(MPIU_Allreduce(work, reductions, N, MPIU_REAL, MPIU_SUM, PetscObjectComm((PetscObject)A))); 2365 } 2366 PetscCall(PetscFree(work)); 2367 if (type == NORM_2) { 2368 for (i = 0; i < N; i++) reductions[i] = PetscSqrtReal(reductions[i]); 2369 } else if (type == REDUCTION_MEAN_REALPART || type == REDUCTION_MEAN_IMAGINARYPART) { 2370 for (i = 0; i < N; i++) reductions[i] /= m; 2371 } 2372 PetscFunctionReturn(0); 2373 } 2374 2375 PetscErrorCode MatInvertBlockDiagonal_MPIBAIJ(Mat A, const PetscScalar **values) 2376 { 2377 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2378 2379 PetscFunctionBegin; 2380 PetscCall(MatInvertBlockDiagonal(a->A, values)); 2381 A->factorerrortype = a->A->factorerrortype; 2382 A->factorerror_zeropivot_value = a->A->factorerror_zeropivot_value; 2383 A->factorerror_zeropivot_row = a->A->factorerror_zeropivot_row; 2384 PetscFunctionReturn(0); 2385 } 2386 2387 PetscErrorCode MatShift_MPIBAIJ(Mat Y, PetscScalar a) 2388 { 2389 Mat_MPIBAIJ *maij = (Mat_MPIBAIJ *)Y->data; 2390 Mat_SeqBAIJ *aij = (Mat_SeqBAIJ *)maij->A->data; 2391 2392 PetscFunctionBegin; 2393 if (!Y->preallocated) { 2394 PetscCall(MatMPIBAIJSetPreallocation(Y, Y->rmap->bs, 1, NULL, 0, NULL)); 2395 } else if (!aij->nz) { 2396 PetscInt nonew = aij->nonew; 2397 PetscCall(MatSeqBAIJSetPreallocation(maij->A, Y->rmap->bs, 1, NULL)); 2398 aij->nonew = nonew; 2399 } 2400 PetscCall(MatShift_Basic(Y, a)); 2401 PetscFunctionReturn(0); 2402 } 2403 2404 PetscErrorCode MatMissingDiagonal_MPIBAIJ(Mat A, PetscBool *missing, PetscInt *d) 2405 { 2406 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2407 2408 PetscFunctionBegin; 2409 PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only works for square matrices"); 2410 PetscCall(MatMissingDiagonal(a->A, missing, d)); 2411 if (d) { 2412 PetscInt rstart; 2413 PetscCall(MatGetOwnershipRange(A, &rstart, NULL)); 2414 *d += rstart / A->rmap->bs; 2415 } 2416 PetscFunctionReturn(0); 2417 } 2418 2419 PetscErrorCode MatGetDiagonalBlock_MPIBAIJ(Mat A, Mat *a) 2420 { 2421 PetscFunctionBegin; 2422 *a = ((Mat_MPIBAIJ *)A->data)->A; 2423 PetscFunctionReturn(0); 2424 } 2425 2426 /* -------------------------------------------------------------------*/ 2427 static struct _MatOps MatOps_Values = {MatSetValues_MPIBAIJ, 2428 MatGetRow_MPIBAIJ, 2429 MatRestoreRow_MPIBAIJ, 2430 MatMult_MPIBAIJ, 2431 /* 4*/ MatMultAdd_MPIBAIJ, 2432 MatMultTranspose_MPIBAIJ, 2433 MatMultTransposeAdd_MPIBAIJ, 2434 NULL, 2435 NULL, 2436 NULL, 2437 /*10*/ NULL, 2438 NULL, 2439 NULL, 2440 MatSOR_MPIBAIJ, 2441 MatTranspose_MPIBAIJ, 2442 /*15*/ MatGetInfo_MPIBAIJ, 2443 MatEqual_MPIBAIJ, 2444 MatGetDiagonal_MPIBAIJ, 2445 MatDiagonalScale_MPIBAIJ, 2446 MatNorm_MPIBAIJ, 2447 /*20*/ MatAssemblyBegin_MPIBAIJ, 2448 MatAssemblyEnd_MPIBAIJ, 2449 MatSetOption_MPIBAIJ, 2450 MatZeroEntries_MPIBAIJ, 2451 /*24*/ MatZeroRows_MPIBAIJ, 2452 NULL, 2453 NULL, 2454 NULL, 2455 NULL, 2456 /*29*/ MatSetUp_MPIBAIJ, 2457 NULL, 2458 NULL, 2459 MatGetDiagonalBlock_MPIBAIJ, 2460 NULL, 2461 /*34*/ MatDuplicate_MPIBAIJ, 2462 NULL, 2463 NULL, 2464 NULL, 2465 NULL, 2466 /*39*/ MatAXPY_MPIBAIJ, 2467 MatCreateSubMatrices_MPIBAIJ, 2468 MatIncreaseOverlap_MPIBAIJ, 2469 MatGetValues_MPIBAIJ, 2470 MatCopy_MPIBAIJ, 2471 /*44*/ NULL, 2472 MatScale_MPIBAIJ, 2473 MatShift_MPIBAIJ, 2474 NULL, 2475 MatZeroRowsColumns_MPIBAIJ, 2476 /*49*/ NULL, 2477 NULL, 2478 NULL, 2479 NULL, 2480 NULL, 2481 /*54*/ MatFDColoringCreate_MPIXAIJ, 2482 NULL, 2483 MatSetUnfactored_MPIBAIJ, 2484 MatPermute_MPIBAIJ, 2485 MatSetValuesBlocked_MPIBAIJ, 2486 /*59*/ MatCreateSubMatrix_MPIBAIJ, 2487 MatDestroy_MPIBAIJ, 2488 MatView_MPIBAIJ, 2489 NULL, 2490 NULL, 2491 /*64*/ NULL, 2492 NULL, 2493 NULL, 2494 NULL, 2495 NULL, 2496 /*69*/ MatGetRowMaxAbs_MPIBAIJ, 2497 NULL, 2498 NULL, 2499 NULL, 2500 NULL, 2501 /*74*/ NULL, 2502 MatFDColoringApply_BAIJ, 2503 NULL, 2504 NULL, 2505 NULL, 2506 /*79*/ NULL, 2507 NULL, 2508 NULL, 2509 NULL, 2510 MatLoad_MPIBAIJ, 2511 /*84*/ NULL, 2512 NULL, 2513 NULL, 2514 NULL, 2515 NULL, 2516 /*89*/ NULL, 2517 NULL, 2518 NULL, 2519 NULL, 2520 NULL, 2521 /*94*/ NULL, 2522 NULL, 2523 NULL, 2524 NULL, 2525 NULL, 2526 /*99*/ NULL, 2527 NULL, 2528 NULL, 2529 MatConjugate_MPIBAIJ, 2530 NULL, 2531 /*104*/ NULL, 2532 MatRealPart_MPIBAIJ, 2533 MatImaginaryPart_MPIBAIJ, 2534 NULL, 2535 NULL, 2536 /*109*/ NULL, 2537 NULL, 2538 NULL, 2539 NULL, 2540 MatMissingDiagonal_MPIBAIJ, 2541 /*114*/ MatGetSeqNonzeroStructure_MPIBAIJ, 2542 NULL, 2543 MatGetGhosts_MPIBAIJ, 2544 NULL, 2545 NULL, 2546 /*119*/ NULL, 2547 NULL, 2548 NULL, 2549 NULL, 2550 MatGetMultiProcBlock_MPIBAIJ, 2551 /*124*/ NULL, 2552 MatGetColumnReductions_MPIBAIJ, 2553 MatInvertBlockDiagonal_MPIBAIJ, 2554 NULL, 2555 NULL, 2556 /*129*/ NULL, 2557 NULL, 2558 NULL, 2559 NULL, 2560 NULL, 2561 /*134*/ NULL, 2562 NULL, 2563 NULL, 2564 NULL, 2565 NULL, 2566 /*139*/ MatSetBlockSizes_Default, 2567 NULL, 2568 NULL, 2569 MatFDColoringSetUp_MPIXAIJ, 2570 NULL, 2571 /*144*/ MatCreateMPIMatConcatenateSeqMat_MPIBAIJ, 2572 NULL, 2573 NULL, 2574 NULL, 2575 NULL, 2576 NULL, 2577 /*150*/ NULL, 2578 NULL}; 2579 2580 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPISBAIJ(Mat, MatType, MatReuse, Mat *); 2581 PETSC_INTERN PetscErrorCode MatConvert_XAIJ_IS(Mat, MatType, MatReuse, Mat *); 2582 2583 PetscErrorCode MatMPIBAIJSetPreallocationCSR_MPIBAIJ(Mat B, PetscInt bs, const PetscInt ii[], const PetscInt jj[], const PetscScalar V[]) 2584 { 2585 PetscInt m, rstart, cstart, cend; 2586 PetscInt i, j, dlen, olen, nz, nz_max = 0, *d_nnz = NULL, *o_nnz = NULL; 2587 const PetscInt *JJ = NULL; 2588 PetscScalar *values = NULL; 2589 PetscBool roworiented = ((Mat_MPIBAIJ *)B->data)->roworiented; 2590 PetscBool nooffprocentries; 2591 2592 PetscFunctionBegin; 2593 PetscCall(PetscLayoutSetBlockSize(B->rmap, bs)); 2594 PetscCall(PetscLayoutSetBlockSize(B->cmap, bs)); 2595 PetscCall(PetscLayoutSetUp(B->rmap)); 2596 PetscCall(PetscLayoutSetUp(B->cmap)); 2597 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2598 m = B->rmap->n / bs; 2599 rstart = B->rmap->rstart / bs; 2600 cstart = B->cmap->rstart / bs; 2601 cend = B->cmap->rend / bs; 2602 2603 PetscCheck(!ii[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "ii[0] must be 0 but it is %" PetscInt_FMT, ii[0]); 2604 PetscCall(PetscMalloc2(m, &d_nnz, m, &o_nnz)); 2605 for (i = 0; i < m; i++) { 2606 nz = ii[i + 1] - ii[i]; 2607 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Local row %" PetscInt_FMT " has a negative number of columns %" PetscInt_FMT, i, nz); 2608 nz_max = PetscMax(nz_max, nz); 2609 dlen = 0; 2610 olen = 0; 2611 JJ = jj + ii[i]; 2612 for (j = 0; j < nz; j++) { 2613 if (*JJ < cstart || *JJ >= cend) olen++; 2614 else dlen++; 2615 JJ++; 2616 } 2617 d_nnz[i] = dlen; 2618 o_nnz[i] = olen; 2619 } 2620 PetscCall(MatMPIBAIJSetPreallocation(B, bs, 0, d_nnz, 0, o_nnz)); 2621 PetscCall(PetscFree2(d_nnz, o_nnz)); 2622 2623 values = (PetscScalar *)V; 2624 if (!values) PetscCall(PetscCalloc1(bs * bs * nz_max, &values)); 2625 for (i = 0; i < m; i++) { 2626 PetscInt row = i + rstart; 2627 PetscInt ncols = ii[i + 1] - ii[i]; 2628 const PetscInt *icols = jj + ii[i]; 2629 if (bs == 1 || !roworiented) { /* block ordering matches the non-nested layout of MatSetValues so we can insert entire rows */ 2630 const PetscScalar *svals = values + (V ? (bs * bs * ii[i]) : 0); 2631 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, ncols, icols, svals, INSERT_VALUES)); 2632 } else { /* block ordering does not match so we can only insert one block at a time. */ 2633 PetscInt j; 2634 for (j = 0; j < ncols; j++) { 2635 const PetscScalar *svals = values + (V ? (bs * bs * (ii[i] + j)) : 0); 2636 PetscCall(MatSetValuesBlocked_MPIBAIJ(B, 1, &row, 1, &icols[j], svals, INSERT_VALUES)); 2637 } 2638 } 2639 } 2640 2641 if (!V) PetscCall(PetscFree(values)); 2642 nooffprocentries = B->nooffprocentries; 2643 B->nooffprocentries = PETSC_TRUE; 2644 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2645 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2646 B->nooffprocentries = nooffprocentries; 2647 2648 PetscCall(MatSetOption(B, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE)); 2649 PetscFunctionReturn(0); 2650 } 2651 2652 /*@C 2653 MatMPIBAIJSetPreallocationCSR - Creates a sparse parallel matrix in `MATBAIJ` format using the given nonzero structure and (optional) numerical values 2654 2655 Collective 2656 2657 Input Parameters: 2658 + B - the matrix 2659 . bs - the block size 2660 . i - the indices into j for the start of each local row (starts with zero) 2661 . j - the column indices for each local row (starts with zero) these must be sorted for each row 2662 - v - optional values in the matrix 2663 2664 Level: advanced 2665 2666 Notes: 2667 The order of the entries in values is specified by the `MatOption` `MAT_ROW_ORIENTED`. For example, C programs 2668 may want to use the default `MAT_ROW_ORIENTED` with value `PETSC_TRUE` and use an array v[nnz][bs][bs] where the second index is 2669 over rows within a block and the last index is over columns within a block row. Fortran programs will likely set 2670 `MAT_ROW_ORIENTED` with value `PETSC_FALSE` and use a Fortran array v(bs,bs,nnz) in which the first index is over rows within a 2671 block column and the second index is over columns within a block. 2672 2673 Though this routine has Preallocation() in the name it also sets the exact nonzero locations of the matrix entries and usually the numerical values as well 2674 2675 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIBAIJSetPreallocation()`, `MatCreateAIJ()`, `MPIAIJ`, `MatCreateMPIBAIJWithArrays()`, `MPIBAIJ` 2676 @*/ 2677 PetscErrorCode MatMPIBAIJSetPreallocationCSR(Mat B, PetscInt bs, const PetscInt i[], const PetscInt j[], const PetscScalar v[]) 2678 { 2679 PetscFunctionBegin; 2680 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 2681 PetscValidType(B, 1); 2682 PetscValidLogicalCollectiveInt(B, bs, 2); 2683 PetscTryMethod(B, "MatMPIBAIJSetPreallocationCSR_C", (Mat, PetscInt, const PetscInt[], const PetscInt[], const PetscScalar[]), (B, bs, i, j, v)); 2684 PetscFunctionReturn(0); 2685 } 2686 2687 PetscErrorCode MatMPIBAIJSetPreallocation_MPIBAIJ(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt *d_nnz, PetscInt o_nz, const PetscInt *o_nnz) 2688 { 2689 Mat_MPIBAIJ *b; 2690 PetscInt i; 2691 PetscMPIInt size; 2692 2693 PetscFunctionBegin; 2694 PetscCall(MatSetBlockSize(B, PetscAbs(bs))); 2695 PetscCall(PetscLayoutSetUp(B->rmap)); 2696 PetscCall(PetscLayoutSetUp(B->cmap)); 2697 PetscCall(PetscLayoutGetBlockSize(B->rmap, &bs)); 2698 2699 if (d_nnz) { 2700 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(d_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "d_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, d_nnz[i]); 2701 } 2702 if (o_nnz) { 2703 for (i = 0; i < B->rmap->n / bs; i++) PetscCheck(o_nnz[i] >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "o_nnz cannot be less than -1: local row %" PetscInt_FMT " value %" PetscInt_FMT, i, o_nnz[i]); 2704 } 2705 2706 b = (Mat_MPIBAIJ *)B->data; 2707 b->bs2 = bs * bs; 2708 b->mbs = B->rmap->n / bs; 2709 b->nbs = B->cmap->n / bs; 2710 b->Mbs = B->rmap->N / bs; 2711 b->Nbs = B->cmap->N / bs; 2712 2713 for (i = 0; i <= b->size; i++) b->rangebs[i] = B->rmap->range[i] / bs; 2714 b->rstartbs = B->rmap->rstart / bs; 2715 b->rendbs = B->rmap->rend / bs; 2716 b->cstartbs = B->cmap->rstart / bs; 2717 b->cendbs = B->cmap->rend / bs; 2718 2719 #if defined(PETSC_USE_CTABLE) 2720 PetscCall(PetscHMapIDestroy(&b->colmap)); 2721 #else 2722 PetscCall(PetscFree(b->colmap)); 2723 #endif 2724 PetscCall(PetscFree(b->garray)); 2725 PetscCall(VecDestroy(&b->lvec)); 2726 PetscCall(VecScatterDestroy(&b->Mvctx)); 2727 2728 /* Because the B will have been resized we simply destroy it and create a new one each time */ 2729 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &size)); 2730 PetscCall(MatDestroy(&b->B)); 2731 PetscCall(MatCreate(PETSC_COMM_SELF, &b->B)); 2732 PetscCall(MatSetSizes(b->B, B->rmap->n, size > 1 ? B->cmap->N : 0, B->rmap->n, size > 1 ? B->cmap->N : 0)); 2733 PetscCall(MatSetType(b->B, MATSEQBAIJ)); 2734 2735 if (!B->preallocated) { 2736 PetscCall(MatCreate(PETSC_COMM_SELF, &b->A)); 2737 PetscCall(MatSetSizes(b->A, B->rmap->n, B->cmap->n, B->rmap->n, B->cmap->n)); 2738 PetscCall(MatSetType(b->A, MATSEQBAIJ)); 2739 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), bs, &B->bstash)); 2740 } 2741 2742 PetscCall(MatSeqBAIJSetPreallocation(b->A, bs, d_nz, d_nnz)); 2743 PetscCall(MatSeqBAIJSetPreallocation(b->B, bs, o_nz, o_nnz)); 2744 B->preallocated = PETSC_TRUE; 2745 B->was_assembled = PETSC_FALSE; 2746 B->assembled = PETSC_FALSE; 2747 PetscFunctionReturn(0); 2748 } 2749 2750 extern PetscErrorCode MatDiagonalScaleLocal_MPIBAIJ(Mat, Vec); 2751 extern PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat, PetscReal); 2752 2753 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAdj(Mat B, MatType newtype, MatReuse reuse, Mat *adj) 2754 { 2755 Mat_MPIBAIJ *b = (Mat_MPIBAIJ *)B->data; 2756 Mat_SeqBAIJ *d = (Mat_SeqBAIJ *)b->A->data, *o = (Mat_SeqBAIJ *)b->B->data; 2757 PetscInt M = B->rmap->n / B->rmap->bs, i, *ii, *jj, cnt, j, k, rstart = B->rmap->rstart / B->rmap->bs; 2758 const PetscInt *id = d->i, *jd = d->j, *io = o->i, *jo = o->j, *garray = b->garray; 2759 2760 PetscFunctionBegin; 2761 PetscCall(PetscMalloc1(M + 1, &ii)); 2762 ii[0] = 0; 2763 for (i = 0; i < M; i++) { 2764 PetscCheck((id[i + 1] - id[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, id[i], id[i + 1]); 2765 PetscCheck((io[i + 1] - io[i]) >= 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Indices wrong %" PetscInt_FMT " %" PetscInt_FMT " %" PetscInt_FMT, i, io[i], io[i + 1]); 2766 ii[i + 1] = ii[i] + id[i + 1] - id[i] + io[i + 1] - io[i]; 2767 /* remove one from count of matrix has diagonal */ 2768 for (j = id[i]; j < id[i + 1]; j++) { 2769 if (jd[j] == i) { 2770 ii[i + 1]--; 2771 break; 2772 } 2773 } 2774 } 2775 PetscCall(PetscMalloc1(ii[M], &jj)); 2776 cnt = 0; 2777 for (i = 0; i < M; i++) { 2778 for (j = io[i]; j < io[i + 1]; j++) { 2779 if (garray[jo[j]] > rstart) break; 2780 jj[cnt++] = garray[jo[j]]; 2781 } 2782 for (k = id[i]; k < id[i + 1]; k++) { 2783 if (jd[k] != i) jj[cnt++] = rstart + jd[k]; 2784 } 2785 for (; j < io[i + 1]; j++) jj[cnt++] = garray[jo[j]]; 2786 } 2787 PetscCall(MatCreateMPIAdj(PetscObjectComm((PetscObject)B), M, B->cmap->N / B->rmap->bs, ii, jj, NULL, adj)); 2788 PetscFunctionReturn(0); 2789 } 2790 2791 #include <../src/mat/impls/aij/mpi/mpiaij.h> 2792 2793 PETSC_INTERN PetscErrorCode MatConvert_SeqBAIJ_SeqAIJ(Mat, MatType, MatReuse, Mat *); 2794 2795 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIAIJ(Mat A, MatType newtype, MatReuse reuse, Mat *newmat) 2796 { 2797 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 2798 Mat_MPIAIJ *b; 2799 Mat B; 2800 2801 PetscFunctionBegin; 2802 PetscCheck(A->assembled, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "Matrix must be assembled"); 2803 2804 if (reuse == MAT_REUSE_MATRIX) { 2805 B = *newmat; 2806 } else { 2807 PetscCall(MatCreate(PetscObjectComm((PetscObject)A), &B)); 2808 PetscCall(MatSetType(B, MATMPIAIJ)); 2809 PetscCall(MatSetSizes(B, A->rmap->n, A->cmap->n, A->rmap->N, A->cmap->N)); 2810 PetscCall(MatSetBlockSizes(B, A->rmap->bs, A->cmap->bs)); 2811 PetscCall(MatSeqAIJSetPreallocation(B, 0, NULL)); 2812 PetscCall(MatMPIAIJSetPreallocation(B, 0, NULL, 0, NULL)); 2813 } 2814 b = (Mat_MPIAIJ *)B->data; 2815 2816 if (reuse == MAT_REUSE_MATRIX) { 2817 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_REUSE_MATRIX, &b->A)); 2818 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_REUSE_MATRIX, &b->B)); 2819 } else { 2820 PetscBool3 sym = A->symmetric, hermitian = A->hermitian, structurally_symmetric = A->structurally_symmetric, spd = A->spd; 2821 PetscCall(MatDestroy(&b->A)); 2822 PetscCall(MatDestroy(&b->B)); 2823 PetscCall(MatDisAssemble_MPIBAIJ(A)); 2824 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->A)); 2825 PetscCall(MatConvert_SeqBAIJ_SeqAIJ(a->B, MATSEQAIJ, MAT_INITIAL_MATRIX, &b->B)); 2826 PetscCall(MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY)); 2827 PetscCall(MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY)); 2828 A->symmetric = sym; 2829 A->hermitian = hermitian; 2830 A->structurally_symmetric = structurally_symmetric; 2831 A->spd = spd; 2832 } 2833 PetscCall(MatAssemblyBegin(B, MAT_FINAL_ASSEMBLY)); 2834 PetscCall(MatAssemblyEnd(B, MAT_FINAL_ASSEMBLY)); 2835 2836 if (reuse == MAT_INPLACE_MATRIX) { 2837 PetscCall(MatHeaderReplace(A, &B)); 2838 } else { 2839 *newmat = B; 2840 } 2841 PetscFunctionReturn(0); 2842 } 2843 2844 /*MC 2845 MATMPIBAIJ - MATMPIBAIJ = "mpibaij" - A matrix type to be used for distributed block sparse matrices. 2846 2847 Options Database Keys: 2848 + -mat_type mpibaij - sets the matrix type to `MATMPIBAIJ` during a call to `MatSetFromOptions()` 2849 . -mat_block_size <bs> - set the blocksize used to store the matrix 2850 . -mat_baij_mult_version version - indicate the version of the matrix-vector product to use (0 often indicates using BLAS) 2851 - -mat_use_hash_table <fact> - set hash table factor 2852 2853 Level: beginner 2854 2855 Note: 2856 `MatSetOptions`(,`MAT_STRUCTURE_ONLY`,`PETSC_TRUE`) may be called for this matrix type. In this no 2857 space is allocated for the nonzero entries and any entries passed with `MatSetValues()` are ignored 2858 2859 .seealso: MATBAIJ`, MATSEQBAIJ`, `MatCreateBAIJ` 2860 M*/ 2861 2862 PETSC_INTERN PetscErrorCode MatConvert_MPIBAIJ_MPIBSTRM(Mat, MatType, MatReuse, Mat *); 2863 2864 PETSC_EXTERN PetscErrorCode MatCreate_MPIBAIJ(Mat B) 2865 { 2866 Mat_MPIBAIJ *b; 2867 PetscBool flg = PETSC_FALSE; 2868 2869 PetscFunctionBegin; 2870 PetscCall(PetscNew(&b)); 2871 B->data = (void *)b; 2872 2873 PetscCall(PetscMemcpy(B->ops, &MatOps_Values, sizeof(struct _MatOps))); 2874 B->assembled = PETSC_FALSE; 2875 2876 B->insertmode = NOT_SET_VALUES; 2877 PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)B), &b->rank)); 2878 PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)B), &b->size)); 2879 2880 /* build local table of row and column ownerships */ 2881 PetscCall(PetscMalloc1(b->size + 1, &b->rangebs)); 2882 2883 /* build cache for off array entries formed */ 2884 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)B), 1, &B->stash)); 2885 2886 b->donotstash = PETSC_FALSE; 2887 b->colmap = NULL; 2888 b->garray = NULL; 2889 b->roworiented = PETSC_TRUE; 2890 2891 /* stuff used in block assembly */ 2892 b->barray = NULL; 2893 2894 /* stuff used for matrix vector multiply */ 2895 b->lvec = NULL; 2896 b->Mvctx = NULL; 2897 2898 /* stuff for MatGetRow() */ 2899 b->rowindices = NULL; 2900 b->rowvalues = NULL; 2901 b->getrowactive = PETSC_FALSE; 2902 2903 /* hash table stuff */ 2904 b->ht = NULL; 2905 b->hd = NULL; 2906 b->ht_size = 0; 2907 b->ht_flag = PETSC_FALSE; 2908 b->ht_fact = 0; 2909 b->ht_total_ct = 0; 2910 b->ht_insert_ct = 0; 2911 2912 /* stuff for MatCreateSubMatrices_MPIBAIJ_local() */ 2913 b->ijonly = PETSC_FALSE; 2914 2915 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiadj_C", MatConvert_MPIBAIJ_MPIAdj)); 2916 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpiaij_C", MatConvert_MPIBAIJ_MPIAIJ)); 2917 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_mpisbaij_C", MatConvert_MPIBAIJ_MPISBAIJ)); 2918 #if defined(PETSC_HAVE_HYPRE) 2919 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_hypre_C", MatConvert_AIJ_HYPRE)); 2920 #endif 2921 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatStoreValues_C", MatStoreValues_MPIBAIJ)); 2922 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatRetrieveValues_C", MatRetrieveValues_MPIBAIJ)); 2923 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocation_C", MatMPIBAIJSetPreallocation_MPIBAIJ)); 2924 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatMPIBAIJSetPreallocationCSR_C", MatMPIBAIJSetPreallocationCSR_MPIBAIJ)); 2925 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatDiagonalScaleLocal_C", MatDiagonalScaleLocal_MPIBAIJ)); 2926 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatSetHashTableFactor_C", MatSetHashTableFactor_MPIBAIJ)); 2927 PetscCall(PetscObjectComposeFunction((PetscObject)B, "MatConvert_mpibaij_is_C", MatConvert_XAIJ_IS)); 2928 PetscCall(PetscObjectChangeTypeName((PetscObject)B, MATMPIBAIJ)); 2929 2930 PetscOptionsBegin(PetscObjectComm((PetscObject)B), NULL, "Options for loading MPIBAIJ matrix 1", "Mat"); 2931 PetscCall(PetscOptionsName("-mat_use_hash_table", "Use hash table to save time in constructing matrix", "MatSetOption", &flg)); 2932 if (flg) { 2933 PetscReal fact = 1.39; 2934 PetscCall(MatSetOption(B, MAT_USE_HASH_TABLE, PETSC_TRUE)); 2935 PetscCall(PetscOptionsReal("-mat_use_hash_table", "Use hash table factor", "MatMPIBAIJSetHashTableFactor", fact, &fact, NULL)); 2936 if (fact <= 1.0) fact = 1.39; 2937 PetscCall(MatMPIBAIJSetHashTableFactor(B, fact)); 2938 PetscCall(PetscInfo(B, "Hash table Factor used %5.2g\n", (double)fact)); 2939 } 2940 PetscOptionsEnd(); 2941 PetscFunctionReturn(0); 2942 } 2943 2944 /*MC 2945 MATBAIJ - MATBAIJ = "baij" - A matrix type to be used for block sparse matrices. 2946 2947 This matrix type is identical to `MATSEQBAIJ` when constructed with a single process communicator, 2948 and `MATMPIBAIJ` otherwise. 2949 2950 Options Database Keys: 2951 . -mat_type baij - sets the matrix type to `MATBAIJ` during a call to `MatSetFromOptions()` 2952 2953 Level: beginner 2954 2955 .seealso: `MatCreateBAIJ()`, `MATSEQBAIJ`, `MATMPIBAIJ`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 2956 M*/ 2957 2958 /*@C 2959 MatMPIBAIJSetPreallocation - Allocates memory for a sparse parallel matrix in `MATMPIBAIJ` format 2960 (block compressed row). For good matrix assembly performance 2961 the user should preallocate the matrix storage by setting the parameters 2962 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 2963 performance can be increased by more than a factor of 50. 2964 2965 Collective 2966 2967 Input Parameters: 2968 + B - the matrix 2969 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 2970 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with MatCreateVecs() 2971 . d_nz - number of block nonzeros per block row in diagonal portion of local 2972 submatrix (same for all local rows) 2973 . d_nnz - array containing the number of block nonzeros in the various block rows 2974 of the in diagonal portion of the local (possibly different for each block 2975 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry and 2976 set it even if it is zero. 2977 . o_nz - number of block nonzeros per block row in the off-diagonal portion of local 2978 submatrix (same for all local rows). 2979 - o_nnz - array containing the number of nonzeros in the various block rows of the 2980 off-diagonal portion of the local submatrix (possibly different for 2981 each block row) or NULL. 2982 2983 If the *_nnz parameter is given then the *_nz parameter is ignored 2984 2985 Options Database Keys: 2986 + -mat_block_size - size of the blocks to use 2987 - -mat_use_hash_table <fact> - set hash table factor 2988 2989 Notes: 2990 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 2991 than it must be used on all processors that share the object for that argument. 2992 2993 Storage Information: 2994 For a square global matrix we define each processor's diagonal portion 2995 to be its local rows and the corresponding columns (a square submatrix); 2996 each processor's off-diagonal portion encompasses the remainder of the 2997 local matrix (a rectangular submatrix). 2998 2999 The user can specify preallocated storage for the diagonal part of 3000 the local submatrix with either d_nz or d_nnz (not both). Set 3001 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3002 memory allocation. Likewise, specify preallocated storage for the 3003 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3004 3005 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3006 the figure below we depict these three local rows and all columns (0-11). 3007 3008 .vb 3009 0 1 2 3 4 5 6 7 8 9 10 11 3010 -------------------------- 3011 row 3 |o o o d d d o o o o o o 3012 row 4 |o o o d d d o o o o o o 3013 row 5 |o o o d d d o o o o o o 3014 -------------------------- 3015 .ve 3016 3017 Thus, any entries in the d locations are stored in the d (diagonal) 3018 submatrix, and any entries in the o locations are stored in the 3019 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3020 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3021 3022 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3023 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3024 In general, for PDE problems in which most nonzeros are near the diagonal, 3025 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3026 or you will get TERRIBLE performance; see the users' manual chapter on 3027 matrices. 3028 3029 You can call `MatGetInfo()` to get information on how effective the preallocation was; 3030 for example the fields mallocs,nz_allocated,nz_used,nz_unneeded; 3031 You can also run with the option -info and look for messages with the string 3032 malloc in them to see if additional memory allocation was needed. 3033 3034 Level: intermediate 3035 3036 .seealso: `MATMPIBAIJ`, `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocationCSR()`, `PetscSplitOwnership()` 3037 @*/ 3038 PetscErrorCode MatMPIBAIJSetPreallocation(Mat B, PetscInt bs, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[]) 3039 { 3040 PetscFunctionBegin; 3041 PetscValidHeaderSpecific(B, MAT_CLASSID, 1); 3042 PetscValidType(B, 1); 3043 PetscValidLogicalCollectiveInt(B, bs, 2); 3044 PetscTryMethod(B, "MatMPIBAIJSetPreallocation_C", (Mat, PetscInt, PetscInt, const PetscInt[], PetscInt, const PetscInt[]), (B, bs, d_nz, d_nnz, o_nz, o_nnz)); 3045 PetscFunctionReturn(0); 3046 } 3047 3048 /*@C 3049 MatCreateBAIJ - Creates a sparse parallel matrix in `MATBAIJ` format 3050 (block compressed row). For good matrix assembly performance 3051 the user should preallocate the matrix storage by setting the parameters 3052 d_nz (or d_nnz) and o_nz (or o_nnz). By setting these parameters accurately, 3053 performance can be increased by more than a factor of 50. 3054 3055 Collective 3056 3057 Input Parameters: 3058 + comm - MPI communicator 3059 . bs - size of block, the blocks are ALWAYS square. One can use `MatSetBlockSizes()` to set a different row and column blocksize but the row 3060 blocksize always defines the size of the blocks. The column blocksize sets the blocksize of the vectors obtained with `MatCreateVecs()` 3061 . m - number of local rows (or `PETSC_DECIDE` to have calculated if M is given) 3062 This value should be the same as the local size used in creating the 3063 y vector for the matrix-vector product y = Ax. 3064 . n - number of local columns (or `PETSC_DECIDE` to have calculated if N is given) 3065 This value should be the same as the local size used in creating the 3066 x vector for the matrix-vector product y = Ax. 3067 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3068 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3069 . d_nz - number of nonzero blocks per block row in diagonal portion of local 3070 submatrix (same for all local rows) 3071 . d_nnz - array containing the number of nonzero blocks in the various block rows 3072 of the in diagonal portion of the local (possibly different for each block 3073 row) or NULL. If you plan to factor the matrix you must leave room for the diagonal entry 3074 and set it even if it is zero. 3075 . o_nz - number of nonzero blocks per block row in the off-diagonal portion of local 3076 submatrix (same for all local rows). 3077 - o_nnz - array containing the number of nonzero blocks in the various block rows of the 3078 off-diagonal portion of the local submatrix (possibly different for 3079 each block row) or NULL. 3080 3081 Output Parameter: 3082 . A - the matrix 3083 3084 Options Database Keys: 3085 + -mat_block_size - size of the blocks to use 3086 - -mat_use_hash_table <fact> - set hash table factor 3087 3088 It is recommended that one use the `MatCreate()`, `MatSetType()` and/or `MatSetFromOptions()`, 3089 MatXXXXSetPreallocation() paradigm instead of this routine directly. 3090 [MatXXXXSetPreallocation() is, for example, `MatSeqBAIJSetPreallocation()`] 3091 3092 Notes: 3093 If the *_nnz parameter is given then the *_nz parameter is ignored 3094 3095 A nonzero block is any block that as 1 or more nonzeros in it 3096 3097 The user MUST specify either the local or global matrix dimensions 3098 (possibly both). 3099 3100 If `PETSC_DECIDE` or `PETSC_DETERMINE` is used for a particular argument on one processor 3101 than it must be used on all processors that share the object for that argument. 3102 3103 Storage Information: 3104 For a square global matrix we define each processor's diagonal portion 3105 to be its local rows and the corresponding columns (a square submatrix); 3106 each processor's off-diagonal portion encompasses the remainder of the 3107 local matrix (a rectangular submatrix). 3108 3109 The user can specify preallocated storage for the diagonal part of 3110 the local submatrix with either d_nz or d_nnz (not both). Set 3111 d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic 3112 memory allocation. Likewise, specify preallocated storage for the 3113 off-diagonal part of the local submatrix with o_nz or o_nnz (not both). 3114 3115 Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In 3116 the figure below we depict these three local rows and all columns (0-11). 3117 3118 .vb 3119 0 1 2 3 4 5 6 7 8 9 10 11 3120 -------------------------- 3121 row 3 |o o o d d d o o o o o o 3122 row 4 |o o o d d d o o o o o o 3123 row 5 |o o o d d d o o o o o o 3124 -------------------------- 3125 .ve 3126 3127 Thus, any entries in the d locations are stored in the d (diagonal) 3128 submatrix, and any entries in the o locations are stored in the 3129 o (off-diagonal) submatrix. Note that the d and the o submatrices are 3130 stored simply in the `MATSEQBAIJ` format for compressed row storage. 3131 3132 Now d_nz should indicate the number of block nonzeros per row in the d matrix, 3133 and o_nz should indicate the number of block nonzeros per row in the o matrix. 3134 In general, for PDE problems in which most nonzeros are near the diagonal, 3135 one expects d_nz >> o_nz. For large problems you MUST preallocate memory 3136 or you will get TERRIBLE performance; see the users' manual chapter on 3137 matrices. 3138 3139 Level: intermediate 3140 3141 .seealso: `MatCreate()`, `MatCreateSeqBAIJ()`, `MatSetValues()`, `MatCreateBAIJ()`, `MatMPIBAIJSetPreallocation()`, `MatMPIBAIJSetPreallocationCSR()` 3142 @*/ 3143 PetscErrorCode MatCreateBAIJ(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, PetscInt d_nz, const PetscInt d_nnz[], PetscInt o_nz, const PetscInt o_nnz[], Mat *A) 3144 { 3145 PetscMPIInt size; 3146 3147 PetscFunctionBegin; 3148 PetscCall(MatCreate(comm, A)); 3149 PetscCall(MatSetSizes(*A, m, n, M, N)); 3150 PetscCallMPI(MPI_Comm_size(comm, &size)); 3151 if (size > 1) { 3152 PetscCall(MatSetType(*A, MATMPIBAIJ)); 3153 PetscCall(MatMPIBAIJSetPreallocation(*A, bs, d_nz, d_nnz, o_nz, o_nnz)); 3154 } else { 3155 PetscCall(MatSetType(*A, MATSEQBAIJ)); 3156 PetscCall(MatSeqBAIJSetPreallocation(*A, bs, d_nz, d_nnz)); 3157 } 3158 PetscFunctionReturn(0); 3159 } 3160 3161 static PetscErrorCode MatDuplicate_MPIBAIJ(Mat matin, MatDuplicateOption cpvalues, Mat *newmat) 3162 { 3163 Mat mat; 3164 Mat_MPIBAIJ *a, *oldmat = (Mat_MPIBAIJ *)matin->data; 3165 PetscInt len = 0; 3166 3167 PetscFunctionBegin; 3168 *newmat = NULL; 3169 PetscCall(MatCreate(PetscObjectComm((PetscObject)matin), &mat)); 3170 PetscCall(MatSetSizes(mat, matin->rmap->n, matin->cmap->n, matin->rmap->N, matin->cmap->N)); 3171 PetscCall(MatSetType(mat, ((PetscObject)matin)->type_name)); 3172 3173 mat->factortype = matin->factortype; 3174 mat->preallocated = PETSC_TRUE; 3175 mat->assembled = PETSC_TRUE; 3176 mat->insertmode = NOT_SET_VALUES; 3177 3178 a = (Mat_MPIBAIJ *)mat->data; 3179 mat->rmap->bs = matin->rmap->bs; 3180 a->bs2 = oldmat->bs2; 3181 a->mbs = oldmat->mbs; 3182 a->nbs = oldmat->nbs; 3183 a->Mbs = oldmat->Mbs; 3184 a->Nbs = oldmat->Nbs; 3185 3186 PetscCall(PetscLayoutReference(matin->rmap, &mat->rmap)); 3187 PetscCall(PetscLayoutReference(matin->cmap, &mat->cmap)); 3188 3189 a->size = oldmat->size; 3190 a->rank = oldmat->rank; 3191 a->donotstash = oldmat->donotstash; 3192 a->roworiented = oldmat->roworiented; 3193 a->rowindices = NULL; 3194 a->rowvalues = NULL; 3195 a->getrowactive = PETSC_FALSE; 3196 a->barray = NULL; 3197 a->rstartbs = oldmat->rstartbs; 3198 a->rendbs = oldmat->rendbs; 3199 a->cstartbs = oldmat->cstartbs; 3200 a->cendbs = oldmat->cendbs; 3201 3202 /* hash table stuff */ 3203 a->ht = NULL; 3204 a->hd = NULL; 3205 a->ht_size = 0; 3206 a->ht_flag = oldmat->ht_flag; 3207 a->ht_fact = oldmat->ht_fact; 3208 a->ht_total_ct = 0; 3209 a->ht_insert_ct = 0; 3210 3211 PetscCall(PetscArraycpy(a->rangebs, oldmat->rangebs, a->size + 1)); 3212 if (oldmat->colmap) { 3213 #if defined(PETSC_USE_CTABLE) 3214 PetscCall(PetscHMapIDuplicate(oldmat->colmap, &a->colmap)); 3215 #else 3216 PetscCall(PetscMalloc1(a->Nbs, &a->colmap)); 3217 PetscCall(PetscArraycpy(a->colmap, oldmat->colmap, a->Nbs)); 3218 #endif 3219 } else a->colmap = NULL; 3220 3221 if (oldmat->garray && (len = ((Mat_SeqBAIJ *)(oldmat->B->data))->nbs)) { 3222 PetscCall(PetscMalloc1(len, &a->garray)); 3223 PetscCall(PetscArraycpy(a->garray, oldmat->garray, len)); 3224 } else a->garray = NULL; 3225 3226 PetscCall(MatStashCreate_Private(PetscObjectComm((PetscObject)matin), matin->rmap->bs, &mat->bstash)); 3227 PetscCall(VecDuplicate(oldmat->lvec, &a->lvec)); 3228 PetscCall(VecScatterCopy(oldmat->Mvctx, &a->Mvctx)); 3229 3230 PetscCall(MatDuplicate(oldmat->A, cpvalues, &a->A)); 3231 PetscCall(MatDuplicate(oldmat->B, cpvalues, &a->B)); 3232 PetscCall(PetscFunctionListDuplicate(((PetscObject)matin)->qlist, &((PetscObject)mat)->qlist)); 3233 *newmat = mat; 3234 PetscFunctionReturn(0); 3235 } 3236 3237 /* Used for both MPIBAIJ and MPISBAIJ matrices */ 3238 PetscErrorCode MatLoad_MPIBAIJ_Binary(Mat mat, PetscViewer viewer) 3239 { 3240 PetscInt header[4], M, N, nz, bs, m, n, mbs, nbs, rows, cols, sum, i, j, k; 3241 PetscInt *rowidxs, *colidxs, rs, cs, ce; 3242 PetscScalar *matvals; 3243 3244 PetscFunctionBegin; 3245 PetscCall(PetscViewerSetUp(viewer)); 3246 3247 /* read in matrix header */ 3248 PetscCall(PetscViewerBinaryRead(viewer, header, 4, NULL, PETSC_INT)); 3249 PetscCheck(header[0] == MAT_FILE_CLASSID, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Not a matrix object in file"); 3250 M = header[1]; 3251 N = header[2]; 3252 nz = header[3]; 3253 PetscCheck(M >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix row size (%" PetscInt_FMT ") in file is negative", M); 3254 PetscCheck(N >= 0, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Matrix column size (%" PetscInt_FMT ") in file is negative", N); 3255 PetscCheck(nz >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix stored in special format on disk, cannot load as MPIBAIJ"); 3256 3257 /* set block sizes from the viewer's .info file */ 3258 PetscCall(MatLoad_Binary_BlockSizes(mat, viewer)); 3259 /* set local sizes if not set already */ 3260 if (mat->rmap->n < 0 && M == N) mat->rmap->n = mat->cmap->n; 3261 if (mat->cmap->n < 0 && M == N) mat->cmap->n = mat->rmap->n; 3262 /* set global sizes if not set already */ 3263 if (mat->rmap->N < 0) mat->rmap->N = M; 3264 if (mat->cmap->N < 0) mat->cmap->N = N; 3265 PetscCall(PetscLayoutSetUp(mat->rmap)); 3266 PetscCall(PetscLayoutSetUp(mat->cmap)); 3267 3268 /* check if the matrix sizes are correct */ 3269 PetscCall(MatGetSize(mat, &rows, &cols)); 3270 PetscCheck(M == rows && N == cols, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Matrix in file of different sizes (%" PetscInt_FMT ", %" PetscInt_FMT ") than the input matrix (%" PetscInt_FMT ", %" PetscInt_FMT ")", M, N, rows, cols); 3271 PetscCall(MatGetBlockSize(mat, &bs)); 3272 PetscCall(MatGetLocalSize(mat, &m, &n)); 3273 PetscCall(PetscLayoutGetRange(mat->rmap, &rs, NULL)); 3274 PetscCall(PetscLayoutGetRange(mat->cmap, &cs, &ce)); 3275 mbs = m / bs; 3276 nbs = n / bs; 3277 3278 /* read in row lengths and build row indices */ 3279 PetscCall(PetscMalloc1(m + 1, &rowidxs)); 3280 PetscCall(PetscViewerBinaryReadAll(viewer, rowidxs + 1, m, PETSC_DECIDE, M, PETSC_INT)); 3281 rowidxs[0] = 0; 3282 for (i = 0; i < m; i++) rowidxs[i + 1] += rowidxs[i]; 3283 PetscCall(MPIU_Allreduce(&rowidxs[m], &sum, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)viewer))); 3284 PetscCheck(sum == nz, PetscObjectComm((PetscObject)viewer), PETSC_ERR_FILE_UNEXPECTED, "Inconsistent matrix data in file: nonzeros = %" PetscInt_FMT ", sum-row-lengths = %" PetscInt_FMT, nz, sum); 3285 3286 /* read in column indices and matrix values */ 3287 PetscCall(PetscMalloc2(rowidxs[m], &colidxs, rowidxs[m], &matvals)); 3288 PetscCall(PetscViewerBinaryReadAll(viewer, colidxs, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_INT)); 3289 PetscCall(PetscViewerBinaryReadAll(viewer, matvals, rowidxs[m], PETSC_DETERMINE, PETSC_DETERMINE, PETSC_SCALAR)); 3290 3291 { /* preallocate matrix storage */ 3292 PetscBT bt; /* helper bit set to count diagonal nonzeros */ 3293 PetscHSetI ht; /* helper hash set to count off-diagonal nonzeros */ 3294 PetscBool sbaij, done; 3295 PetscInt *d_nnz, *o_nnz; 3296 3297 PetscCall(PetscBTCreate(nbs, &bt)); 3298 PetscCall(PetscHSetICreate(&ht)); 3299 PetscCall(PetscCalloc2(mbs, &d_nnz, mbs, &o_nnz)); 3300 PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATMPISBAIJ, &sbaij)); 3301 for (i = 0; i < mbs; i++) { 3302 PetscCall(PetscBTMemzero(nbs, bt)); 3303 PetscCall(PetscHSetIClear(ht)); 3304 for (k = 0; k < bs; k++) { 3305 PetscInt row = bs * i + k; 3306 for (j = rowidxs[row]; j < rowidxs[row + 1]; j++) { 3307 PetscInt col = colidxs[j]; 3308 if (!sbaij || col >= row) { 3309 if (col >= cs && col < ce) { 3310 if (!PetscBTLookupSet(bt, (col - cs) / bs)) d_nnz[i]++; 3311 } else { 3312 PetscCall(PetscHSetIQueryAdd(ht, col / bs, &done)); 3313 if (done) o_nnz[i]++; 3314 } 3315 } 3316 } 3317 } 3318 } 3319 PetscCall(PetscBTDestroy(&bt)); 3320 PetscCall(PetscHSetIDestroy(&ht)); 3321 PetscCall(MatMPIBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3322 PetscCall(MatMPISBAIJSetPreallocation(mat, bs, 0, d_nnz, 0, o_nnz)); 3323 PetscCall(PetscFree2(d_nnz, o_nnz)); 3324 } 3325 3326 /* store matrix values */ 3327 for (i = 0; i < m; i++) { 3328 PetscInt row = rs + i, s = rowidxs[i], e = rowidxs[i + 1]; 3329 PetscCall((*mat->ops->setvalues)(mat, 1, &row, e - s, colidxs + s, matvals + s, INSERT_VALUES)); 3330 } 3331 3332 PetscCall(PetscFree(rowidxs)); 3333 PetscCall(PetscFree2(colidxs, matvals)); 3334 PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY)); 3335 PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY)); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 PetscErrorCode MatLoad_MPIBAIJ(Mat mat, PetscViewer viewer) 3340 { 3341 PetscBool isbinary; 3342 3343 PetscFunctionBegin; 3344 PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERBINARY, &isbinary)); 3345 PetscCheck(isbinary, PetscObjectComm((PetscObject)viewer), PETSC_ERR_SUP, "Viewer type %s not yet supported for reading %s matrices", ((PetscObject)viewer)->type_name, ((PetscObject)mat)->type_name); 3346 PetscCall(MatLoad_MPIBAIJ_Binary(mat, viewer)); 3347 PetscFunctionReturn(0); 3348 } 3349 3350 /*@ 3351 MatMPIBAIJSetHashTableFactor - Sets the factor required to compute the size of the matrices hash table 3352 3353 Input Parameters: 3354 + mat - the matrix 3355 - fact - factor 3356 3357 Options Database Key: 3358 . -mat_use_hash_table <fact> - provide the factor 3359 3360 Level: advanced 3361 3362 .seealso: `MATMPIBAIJ`, `MatSetOption()` 3363 @*/ 3364 PetscErrorCode MatMPIBAIJSetHashTableFactor(Mat mat, PetscReal fact) 3365 { 3366 PetscFunctionBegin; 3367 PetscTryMethod(mat, "MatSetHashTableFactor_C", (Mat, PetscReal), (mat, fact)); 3368 PetscFunctionReturn(0); 3369 } 3370 3371 PetscErrorCode MatSetHashTableFactor_MPIBAIJ(Mat mat, PetscReal fact) 3372 { 3373 Mat_MPIBAIJ *baij; 3374 3375 PetscFunctionBegin; 3376 baij = (Mat_MPIBAIJ *)mat->data; 3377 baij->ht_fact = fact; 3378 PetscFunctionReturn(0); 3379 } 3380 3381 PetscErrorCode MatMPIBAIJGetSeqBAIJ(Mat A, Mat *Ad, Mat *Ao, const PetscInt *colmap[]) 3382 { 3383 Mat_MPIBAIJ *a = (Mat_MPIBAIJ *)A->data; 3384 PetscBool flg; 3385 3386 PetscFunctionBegin; 3387 PetscCall(PetscObjectTypeCompare((PetscObject)A, MATMPIBAIJ, &flg)); 3388 PetscCheck(flg, PetscObjectComm((PetscObject)A), PETSC_ERR_SUP, "This function requires a MATMPIBAIJ matrix as input"); 3389 if (Ad) *Ad = a->A; 3390 if (Ao) *Ao = a->B; 3391 if (colmap) *colmap = a->garray; 3392 PetscFunctionReturn(0); 3393 } 3394 3395 /* 3396 Special version for direct calls from Fortran (to eliminate two function call overheads 3397 */ 3398 #if defined(PETSC_HAVE_FORTRAN_CAPS) 3399 #define matmpibaijsetvaluesblocked_ MATMPIBAIJSETVALUESBLOCKED 3400 #elif !defined(PETSC_HAVE_FORTRAN_UNDERSCORE) 3401 #define matmpibaijsetvaluesblocked_ matmpibaijsetvaluesblocked 3402 #endif 3403 3404 /*@C 3405 MatMPIBAIJSetValuesBlocked - Direct Fortran call to replace call to `MatSetValuesBlocked()` 3406 3407 Collective 3408 3409 Input Parameters: 3410 + mat - the matrix 3411 . min - number of input rows 3412 . im - input rows 3413 . nin - number of input columns 3414 . in - input columns 3415 . v - numerical values input 3416 - addvin - `INSERT_VALUES` or `ADD_VALUES` 3417 3418 Developer Note: 3419 This has a complete copy of `MatSetValuesBlocked_MPIBAIJ()` which is terrible code un-reuse. 3420 3421 Level: advanced 3422 3423 .seealso: `MatSetValuesBlocked()` 3424 @*/ 3425 PetscErrorCode matmpibaijsetvaluesblocked_(Mat *matin, PetscInt *min, const PetscInt im[], PetscInt *nin, const PetscInt in[], const MatScalar v[], InsertMode *addvin) 3426 { 3427 /* convert input arguments to C version */ 3428 Mat mat = *matin; 3429 PetscInt m = *min, n = *nin; 3430 InsertMode addv = *addvin; 3431 3432 Mat_MPIBAIJ *baij = (Mat_MPIBAIJ *)mat->data; 3433 const MatScalar *value; 3434 MatScalar *barray = baij->barray; 3435 PetscBool roworiented = baij->roworiented; 3436 PetscInt i, j, ii, jj, row, col, rstart = baij->rstartbs; 3437 PetscInt rend = baij->rendbs, cstart = baij->cstartbs, stepval; 3438 PetscInt cend = baij->cendbs, bs = mat->rmap->bs, bs2 = baij->bs2; 3439 3440 PetscFunctionBegin; 3441 /* tasks normally handled by MatSetValuesBlocked() */ 3442 if (mat->insertmode == NOT_SET_VALUES) mat->insertmode = addv; 3443 else PetscCheck(mat->insertmode == addv, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot mix add values and insert values"); 3444 PetscCheck(!mat->factortype, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Not for factored matrix"); 3445 if (mat->assembled) { 3446 mat->was_assembled = PETSC_TRUE; 3447 mat->assembled = PETSC_FALSE; 3448 } 3449 PetscCall(PetscLogEventBegin(MAT_SetValues, mat, 0, 0, 0)); 3450 3451 if (!barray) { 3452 PetscCall(PetscMalloc1(bs2, &barray)); 3453 baij->barray = barray; 3454 } 3455 3456 if (roworiented) stepval = (n - 1) * bs; 3457 else stepval = (m - 1) * bs; 3458 3459 for (i = 0; i < m; i++) { 3460 if (im[i] < 0) continue; 3461 PetscCheck(im[i] < baij->Mbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Row too large, row %" PetscInt_FMT " max %" PetscInt_FMT, im[i], baij->Mbs - 1); 3462 if (im[i] >= rstart && im[i] < rend) { 3463 row = im[i] - rstart; 3464 for (j = 0; j < n; j++) { 3465 /* If NumCol = 1 then a copy is not required */ 3466 if ((roworiented) && (n == 1)) { 3467 barray = (MatScalar *)v + i * bs2; 3468 } else if ((!roworiented) && (m == 1)) { 3469 barray = (MatScalar *)v + j * bs2; 3470 } else { /* Here a copy is required */ 3471 if (roworiented) { 3472 value = v + i * (stepval + bs) * bs + j * bs; 3473 } else { 3474 value = v + j * (stepval + bs) * bs + i * bs; 3475 } 3476 for (ii = 0; ii < bs; ii++, value += stepval) { 3477 for (jj = 0; jj < bs; jj++) *barray++ = *value++; 3478 } 3479 barray -= bs2; 3480 } 3481 3482 if (in[j] >= cstart && in[j] < cend) { 3483 col = in[j] - cstart; 3484 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->A, row, col, barray, addv, im[i], in[j])); 3485 } else if (in[j] < 0) { 3486 continue; 3487 } else { 3488 PetscCheck(in[j] < baij->Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Column too large, col %" PetscInt_FMT " max %" PetscInt_FMT, in[j], baij->Nbs - 1); 3489 if (mat->was_assembled) { 3490 if (!baij->colmap) PetscCall(MatCreateColmap_MPIBAIJ_Private(mat)); 3491 3492 #if defined(PETSC_USE_DEBUG) 3493 #if defined(PETSC_USE_CTABLE) 3494 { 3495 PetscInt data; 3496 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &data)); 3497 PetscCheck((data - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3498 } 3499 #else 3500 PetscCheck((baij->colmap[in[j]] - 1) % bs == 0, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Incorrect colmap"); 3501 #endif 3502 #endif 3503 #if defined(PETSC_USE_CTABLE) 3504 PetscCall(PetscHMapIGetWithDefault(baij->colmap, in[j] + 1, 0, &col)); 3505 col = (col - 1) / bs; 3506 #else 3507 col = (baij->colmap[in[j]] - 1) / bs; 3508 #endif 3509 if (col < 0 && !((Mat_SeqBAIJ *)(baij->A->data))->nonew) { 3510 PetscCall(MatDisAssemble_MPIBAIJ(mat)); 3511 col = in[j]; 3512 } 3513 } else col = in[j]; 3514 PetscCall(MatSetValuesBlocked_SeqBAIJ_Inlined(baij->B, row, col, barray, addv, im[i], in[j])); 3515 } 3516 } 3517 } else { 3518 if (!baij->donotstash) { 3519 if (roworiented) { 3520 PetscCall(MatStashValuesRowBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3521 } else { 3522 PetscCall(MatStashValuesColBlocked_Private(&mat->bstash, im[i], n, in, v, m, n, i)); 3523 } 3524 } 3525 } 3526 } 3527 3528 /* task normally handled by MatSetValuesBlocked() */ 3529 PetscCall(PetscLogEventEnd(MAT_SetValues, mat, 0, 0, 0)); 3530 PetscFunctionReturn(0); 3531 } 3532 3533 /*@ 3534 MatCreateMPIBAIJWithArrays - creates a `MATMPIBAIJ` matrix using arrays that contain in standard block 3535 CSR format the local rows. 3536 3537 Collective 3538 3539 Input Parameters: 3540 + comm - MPI communicator 3541 . bs - the block size, only a block size of 1 is supported 3542 . m - number of local rows (Cannot be `PETSC_DECIDE`) 3543 . n - This value should be the same as the local size used in creating the 3544 x vector for the matrix-vector product y = Ax. (or `PETSC_DECIDE` to have 3545 calculated if N is given) For square matrices n is almost always m. 3546 . M - number of global rows (or `PETSC_DETERMINE` to have calculated if m is given) 3547 . N - number of global columns (or `PETSC_DETERMINE` to have calculated if n is given) 3548 . i - row indices; that is i[0] = 0, i[row] = i[row-1] + number of block elements in that rowth block row of the matrix 3549 . j - column indices 3550 - a - matrix values 3551 3552 Output Parameter: 3553 . mat - the matrix 3554 3555 Level: intermediate 3556 3557 Notes: 3558 The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc; 3559 thus you CANNOT change the matrix entries by changing the values of a[] after you have 3560 called this routine. Use `MatCreateMPIAIJWithSplitArrays()` to avoid needing to copy the arrays. 3561 3562 The order of the entries in values is the same as the block compressed sparse row storage format; that is, it is 3563 the same as a three dimensional array in Fortran values(bs,bs,nnz) that contains the first column of the first 3564 block, followed by the second column of the first block etc etc. That is, the blocks are contiguous in memory 3565 with column-major ordering within blocks. 3566 3567 The i and j indices are 0 based, and i indices are indices corresponding to the local j array. 3568 3569 .seealso: `MatCreate()`, `MatCreateSeqAIJ()`, `MatSetValues()`, `MatMPIAIJSetPreallocation()`, `MatMPIAIJSetPreallocationCSR()`, 3570 `MPIAIJ`, `MatCreateAIJ()`, `MatCreateMPIAIJWithSplitArrays()` 3571 @*/ 3572 PetscErrorCode MatCreateMPIBAIJWithArrays(MPI_Comm comm, PetscInt bs, PetscInt m, PetscInt n, PetscInt M, PetscInt N, const PetscInt i[], const PetscInt j[], const PetscScalar a[], Mat *mat) 3573 { 3574 PetscFunctionBegin; 3575 PetscCheck(!i[0], PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "i (row indices) must start with 0"); 3576 PetscCheck(m >= 0, PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "local number of rows (m) cannot be PETSC_DECIDE, or negative"); 3577 PetscCall(MatCreate(comm, mat)); 3578 PetscCall(MatSetSizes(*mat, m, n, M, N)); 3579 PetscCall(MatSetType(*mat, MATMPIBAIJ)); 3580 PetscCall(MatSetBlockSize(*mat, bs)); 3581 PetscCall(MatSetUp(*mat)); 3582 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_FALSE)); 3583 PetscCall(MatMPIBAIJSetPreallocationCSR(*mat, bs, i, j, a)); 3584 PetscCall(MatSetOption(*mat, MAT_ROW_ORIENTED, PETSC_TRUE)); 3585 PetscFunctionReturn(0); 3586 } 3587 3588 PetscErrorCode MatCreateMPIMatConcatenateSeqMat_MPIBAIJ(MPI_Comm comm, Mat inmat, PetscInt n, MatReuse scall, Mat *outmat) 3589 { 3590 PetscInt m, N, i, rstart, nnz, Ii, bs, cbs; 3591 PetscInt *indx; 3592 PetscScalar *values; 3593 3594 PetscFunctionBegin; 3595 PetscCall(MatGetSize(inmat, &m, &N)); 3596 if (scall == MAT_INITIAL_MATRIX) { /* symbolic phase */ 3597 Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)inmat->data; 3598 PetscInt *dnz, *onz, mbs, Nbs, nbs; 3599 PetscInt *bindx, rmax = a->rmax, j; 3600 PetscMPIInt rank, size; 3601 3602 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3603 mbs = m / bs; 3604 Nbs = N / cbs; 3605 if (n == PETSC_DECIDE) PetscCall(PetscSplitOwnershipBlock(comm, cbs, &n, &N)); 3606 nbs = n / cbs; 3607 3608 PetscCall(PetscMalloc1(rmax, &bindx)); 3609 MatPreallocateBegin(comm, mbs, nbs, dnz, onz); /* inline function, output __end and __rstart are used below */ 3610 3611 PetscCallMPI(MPI_Comm_rank(comm, &rank)); 3612 PetscCallMPI(MPI_Comm_rank(comm, &size)); 3613 if (rank == size - 1) { 3614 /* Check sum(nbs) = Nbs */ 3615 PetscCheck(__end == Nbs, PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP, "Sum of local block columns %" PetscInt_FMT " != global block columns %" PetscInt_FMT, __end, Nbs); 3616 } 3617 3618 rstart = __rstart; /* block rstart of *outmat; see inline function MatPreallocateBegin */ 3619 for (i = 0; i < mbs; i++) { 3620 PetscCall(MatGetRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); /* non-blocked nnz and indx */ 3621 nnz = nnz / bs; 3622 for (j = 0; j < nnz; j++) bindx[j] = indx[j * bs] / bs; 3623 PetscCall(MatPreallocateSet(i + rstart, nnz, bindx, dnz, onz)); 3624 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i * bs, &nnz, &indx, NULL)); 3625 } 3626 PetscCall(PetscFree(bindx)); 3627 3628 PetscCall(MatCreate(comm, outmat)); 3629 PetscCall(MatSetSizes(*outmat, m, n, PETSC_DETERMINE, PETSC_DETERMINE)); 3630 PetscCall(MatSetBlockSizes(*outmat, bs, cbs)); 3631 PetscCall(MatSetType(*outmat, MATBAIJ)); 3632 PetscCall(MatSeqBAIJSetPreallocation(*outmat, bs, 0, dnz)); 3633 PetscCall(MatMPIBAIJSetPreallocation(*outmat, bs, 0, dnz, 0, onz)); 3634 MatPreallocateEnd(dnz, onz); 3635 PetscCall(MatSetOption(*outmat, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE)); 3636 } 3637 3638 /* numeric phase */ 3639 PetscCall(MatGetBlockSizes(inmat, &bs, &cbs)); 3640 PetscCall(MatGetOwnershipRange(*outmat, &rstart, NULL)); 3641 3642 for (i = 0; i < m; i++) { 3643 PetscCall(MatGetRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3644 Ii = i + rstart; 3645 PetscCall(MatSetValues(*outmat, 1, &Ii, nnz, indx, values, INSERT_VALUES)); 3646 PetscCall(MatRestoreRow_SeqBAIJ(inmat, i, &nnz, &indx, &values)); 3647 } 3648 PetscCall(MatAssemblyBegin(*outmat, MAT_FINAL_ASSEMBLY)); 3649 PetscCall(MatAssemblyEnd(*outmat, MAT_FINAL_ASSEMBLY)); 3650 PetscFunctionReturn(0); 3651 } 3652